diff --git a/.travis.yml b/.travis.yml index fb8fa537..8c311eee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ r_packages: r_github_packages: - OHDSI/DatabaseConnector + - OHDSI/OhdsiRTools before_install: - Rscript -e 'update.packages(ask = FALSE)' diff --git a/Achilles.Rproj b/Achilles.Rproj old mode 100644 new mode 100755 index 4f7190a4..72ebc0c4 --- a/Achilles.Rproj +++ b/Achilles.Rproj @@ -1,18 +1,18 @@ -Version: 1.0 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: No - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageInstallArgs: --no-multiarch --with-keep.source -PackageCheckArgs: --no-multiarch -PackageRoxygenize: rd,namespace +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --no-multiarch +PackageRoxygenize: rd,collate,namespace diff --git a/DESCRIPTION b/DESCRIPTION old mode 100644 new mode 100755 index 9fdea438..1372f95b --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,20 +1,23 @@ -Package: Achilles -Type: Package -Title: Creates descriptive statistics summary for an entire OMOP CDM instance -Version: 1.5 -Date: 2016-10-20 -Author: Patrick Ryan, Martijn Schuemie, Vojtech Huser, Chris Knoll -Maintainer: Patrick Ryan -LazyData: true -Description: creates descriptive statistics summary for an entire OMOP CDM - instance. Since Aug 2016 only CDM v5 is actively being extended. Achilles - Heel component does data quality assesment. -Depends: - SqlRender, - DatabaseConnector (>= 1.11.4), - rjson -Suggests: - testthat -License: Apache License -Roxygen: list(wrap = FALSE) -RoxygenNote: 6.0.1 +Package: Achilles +Type: Package +Title: Creates descriptive statistics summary for an entire OMOP CDM instance +Version: 1.6 +Date: 2018-04-09 +Author: Patrick Ryan, Martijn Schuemie, Vojtech Huser, Chris Knoll, Ajit Londhe +Maintainer: Patrick Ryan +LazyData: true +Description: creates descriptive statistics summary for an entire OMOP CDM + instance. Since Aug 2016 only CDM v5 is actively being extended. Achilles + Heel component does data quality assesment. +Depends: + SqlRender, + DatabaseConnector (>= 2.0.0), + rjson, + OhdsiRTools +Suggests: + testthat, + dplyr, + R.utils +License: Apache License +Roxygen: list(wrap = FALSE) +RoxygenNote: 6.0.1 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 index 7584997d..ae49d2cb --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ RUN echo deb http://ppa.launchpad.net/marutter/rrutter/ubuntu trusty main >> /et r-cran-dbi \ r-cran-ffbase \ r-cran-urltools \ + libxml2-dev \ littler \ openjdk-7-jdk \ && rm -rf /var/lib/apt/lists/* \ @@ -32,14 +33,32 @@ RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ ENV LC_ALL en_US.UTF-8 ENV LANG en_US.UTF-8 +# Install OHDSI/OhdsiRTools +RUN R -e "install.packages( \ + c( \ + 'XML', \ + 'RJSONIO' \ + ), \ + repos='http://cran.rstudio.com/', \ +) " + # Install Achilles requirements that need to be installed from source RUN echo 'options(repos=structure(c(CRAN="http://cran.cnr.berkeley.edu/")))' > /root/.Rprofile && \ /usr/share/doc/littler/examples/install.r remotes && \ /usr/share/doc/littler/examples/install.r docopt && \ + /usr/share/doc/littler/examples/install.r openxlsx && \ + /usr/share/doc/littler/examples/install.r httr && \ + /usr/share/doc/littler/examples/install.r rjson && \ + /usr/share/doc/littler/examples/install.r R.oo && \ + /usr/share/doc/littler/examples/install.r formatR && \ + /usr/share/doc/littler/examples/install.r R.utils && \ + /usr/share/doc/littler/examples/install.r snow && \ + /usr/share/doc/littler/examples/install.r mailR && \ /usr/share/doc/littler/examples/installGithub.r \ OHDSI/SqlRender \ OHDSI/DatabaseConnectorJars \ OHDSI/DatabaseConnector \ + OHDSI/OhdsiRTools \ && rm -rf /tmp/downloaded_packages/ /tmp/*.rds # Configure workspace diff --git a/NAMESPACE b/NAMESPACE old mode 100644 new mode 100755 index 4c0f5fec..9ac04c14 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,8 +3,9 @@ export(achilles) export(achillesHeel) export(addDatasource) -export(conceptHierarchy) +export(createConceptHierarchy) export(createIndices) +export(dropAllScratchTables) export(exportConditionEraToJson) export(exportConditionToJson) export(exportDashboardToJson) @@ -25,3 +26,4 @@ export(fetchAchillesAnalysisResults) export(fetchAchillesHeelResults) export(getAnalysisDetails) export(showReportTypes) +export(validateSchema) diff --git a/R/Achilles-internal.R b/R/Achilles-internal.R old mode 100644 new mode 100755 diff --git a/R/Achilles-package.R b/R/Achilles-package.R old mode 100644 new mode 100755 diff --git a/R/Achilles.R b/R/Achilles.R old mode 100644 new mode 100755 index 53034869..f7046cbb --- a/R/Achilles.R +++ b/R/Achilles.R @@ -1,382 +1,1055 @@ -# @file Achilles -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of Achilles -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics -# @author Martijn Schuemie -# @author Patrick Ryan - -#' Get all analysis details -#' -#' @details -#' Get a list of all analyses with their analysis IDs and strata. -#' -#' @return -#' A data.frame with the analysis details. -#' -#' @export -getAnalysisDetails <- function() { - pathToCsv <- system.file("csv", "analysisDetails.csv", package = "Achilles") - analysisDetails <- utils::read.csv(pathToCsv) - return(analysisDetails) -} - -#' The main Achilles analysis -#' -#' @description -#' \code{achilles} creates descriptive statistics summary for an entire OMOP CDM instance. -#' -#' @details -#' \code{achilles} creates descriptive statistics summary for an entire OMOP CDM instance. -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'. -#' @param oracleTempSchema For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database. -#' @param resultsDatabaseSchema string name of database schema that we can write results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' @param sourceName string name of the database, as recorded in results -#' @param analysisIds (optional) a vector containing the set of Achilles analysisIds for which results will be generated. -#' If not specified, all analyses will be executed. Use \code{\link{getAnalysisDetails}} to get a list of all Achilles analyses and their Ids. -#' @param createTable If true, new results tables will be created in the results schema. If not, the tables are assumed to already exists, and analysis results will be added -#' @param smallcellcount To avoid patient identifiability, cells with small counts (<= smallcellcount) are deleted. -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param runHeel Boolean to determine if Achilles Heel data quality reporting will be produced based on the summary statistics. Default = TRUE -#' @param validateSchema Boolean to determine if CDM Schema Validation should be run. This could be very slow. Default = FALSE -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' @param sqlOnly Boolean to determine if the SQL generated by this function call should be executed or simply returned as a string. DEFAULT = FALSE indicating that the SQL code should be executed. -#' @param runCostAnalysis Boolean to determine if cost analysis should be run. Note: only works on CDM v5.0 style cost tables. -#' @param conceptHierarchy Boolean to determine if the concept_hierarchy result table should be created, for use by Atlas treemaps. Note: only works on CDM v5.0 tables. -#' @param createIndices Boolean to determine if indices should be created on the resulting Achilles and concept_hierarchy table. Default= TRUE -#' -#' @return An object of type \code{achillesResults} containing details for connecting to the database containing the results -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") -#' achillesResults <- achilles(connectionDetails, cdmDatabaseSchema="cdm4_sim", resultsDatabaseSchema="scratch", sourceName="TestDB", validateSchema="TRUE", vocabDatabaseSchema="vocabulary") -#' fetchAchillesAnalysisResults(connectionDetails, "scratch", 106) -#' } -#' @export -achilles <- function (connectionDetails, - cdmDatabaseSchema, - oracleTempSchema = cdmDatabaseSchema, - resultsDatabaseSchema = cdmDatabaseSchema, - sourceName = "", - analysisIds, - createTable = TRUE, - smallcellcount = 5, - cdmVersion = "4", - runHeel = TRUE, - validateSchema = FALSE, - vocabDatabaseSchema = cdmDatabaseSchema, - runCostAnalysis = FALSE, - sqlOnly = FALSE, - conceptHierarchy = TRUE, - createIndices = TRUE){ - - if (cdmVersion == "4") { - stop("Error: Invalid CDM Version number, version 4 is no longer supported.") - #achillesFile <- "Achilles_v4.sql" - #heelFile <- "AchillesHeel_v4.sql" - #hierarchyFile = "" - #conceptHierarchy = FALSE - } else if (cdmVersion == "5") { - achillesFile <- "Achilles_v5.sql" - heelFile <- "AchillesHeel_v5.sql" - hierarchyFile = "ConceptHierarchy_v5.sql" - } else { - stop("Error: Invalid CDM Version number, use 4 or 5") - } - - if (missing(analysisIds)) - analysisIds = getAnalysisDetails()$ANALYSIS_ID - -# cdmDatabase <- strsplit(cdmDatabaseSchema ,"\\.")[[1]][1] -# resultsDatabase <- strsplit(resultsDatabaseSchema ,"\\.")[[1]][1] -# vocabDatabase <- strsplit(vocabDatabaseSchema ,"\\.")[[1]][1] - - achillesSql <- SqlRender::loadRenderTranslateSql(sqlFilename = achillesFile, - packageName = "Achilles", - dbms = connectionDetails$dbms, - oracleTempSchema = oracleTempSchema, - # cdm_database = cdmDatabase, - cdm_database_schema = cdmDatabaseSchema, - # results_database = resultsDatabase, - results_database_schema = resultsDatabaseSchema, - source_name = sourceName, - list_of_analysis_ids = analysisIds, - createTable = createTable, - smallcellcount = smallcellcount, - validateSchema = validateSchema, - # vocab_database = vocabDatabase, - # vocab_database_schema = vocabDatabaseSchema, - runCostAnalysis = runCostAnalysis - ) - - if (sqlOnly) { - outputFolder <- "output"; - - if (!file.exists(outputFolder)) - dir.create(outputFolder); - SqlRender::writeSql(achillesSql,paste(outputFolder, achillesFile, sep="/")); - - writeLines(paste("Achilles sql generated in: ", paste(outputFolder, achillesFile, sep="/"))); - - return(); - } else { - conn <- DatabaseConnector::connect(connectionDetails) - writeLines("Executing multiple queries. This could take a while") - #SqlRender::writeSql(achillesSql, 'achillesDebug.sql'); - DatabaseConnector::executeSql(conn,achillesSql) - writeLines(paste("Done. Achilles results can now be found in",resultsDatabaseSchema)) - } - - if (runHeel) { - heelSql <- SqlRender::loadRenderTranslateSql(sqlFilename = heelFile, - packageName = "Achilles", - dbms = connectionDetails$dbms, - oracleTempSchema = oracleTempSchema, - cdm_database_schema = cdmDatabaseSchema, - # results_database = resultsDatabase, - results_database_schema = resultsDatabaseSchema, - source_name = sourceName, - #list_of_analysis_ids = analysisIds, - createTable = createTable, - smallcellcount = smallcellcount, - # vocab_database = vocabDatabase, - vocab_database_schema = vocabDatabaseSchema - ) - - writeLines("Executing Achilles Heel. This could take a while") - DatabaseConnector::executeSql(conn,heelSql) - writeLines(paste("Done. Achilles Heel results can now be found in",resultsDatabaseSchema)) - - } else heelSql='HEEL EXECUTION SKIPPED PER USER REQUEST' - - if (conceptHierarchy) { - hierarchySql <- SqlRender::loadRenderTranslateSql(sqlFilename = hierarchyFile, - packageName = "Achilles", - dbms = connectionDetails$dbms, - oracleTempSchema = oracleTempSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - writeLines("Executing Concept Hierarchy creation. This could take a while") - DatabaseConnector::executeSql(conn,hierarchySql) - writeLines(paste("Done. Concept Hierarchy table can now be found in",resultsDatabaseSchema)) - - } else heelSql='CONCEPT HIERARCHY EXECUTION SKIPPED PER USER REQUEST' - - if (createIndices) { - indicesSql <- createIndices(connectionDetails = connectionDetails, - resultsDatabaseSchema = resultsDatabaseSchema, - cdmVersion = cdmVersion) - } else { - indicesSql = "INDEX CREATION SKIPPED PER USER REQUEST" - } - - DatabaseConnector::disconnect(conn) - - resultsConnectionDetails <- connectionDetails - resultsConnectionDetails$schema = resultsDatabaseSchema - result <- list(resultsConnectionDetails = resultsConnectionDetails, - resultsTable = "ACHILLES_results", - resultsDistributionTable ="ACHILLES_results_dist", - analysis_table = "ACHILLES_analysis", - sourceName = sourceName, - analysisIds = analysisIds, - AchillesSql = achillesSql, - HeelSql = heelSql, #if runHeel is false - this assignment fails - causes error of the whole function (adding else) - call = match.call()) - class(result) <- "achillesResults" - result -} - -#' execution of data quality rules -#' -#' @description -#' \code{achillesHeel} executes data quality rules (or checks) on pre-computed analyses (or measures). -#' -#' @details -#' \code{achillesHeel} contains number of rules (authored in SQL) that are executed againts achilles results tables. -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'. -#' @param oracleTempSchema For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database. -#' @param resultsDatabaseSchema string name of database schema that we can write results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return nothing is returned -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") -#' achillesHeel <- achilles(connectionDetails, cdmDatabaseSchema="mycdm", resultsDatabaseSchema="scratch", vocabDatabaseSchema="vocabulary") -#' } -#' @export -achillesHeel <- function (connectionDetails, - cdmDatabaseSchema, - oracleTempSchema = cdmDatabaseSchema, - resultsDatabaseSchema = cdmDatabaseSchema, - cdmVersion = "5", - vocabDatabaseSchema = cdmDatabaseSchema){ - -# resultsDatabase <- strsplit(resultsDatabaseSchema ,"\\.")[[1]][1] -# vocabDatabase <- strsplit(vocabDatabaseSchema ,"\\.")[[1]][1] - - if (cdmVersion == "4") { - heelFile <- "AchillesHeel_v4.sql" - } else if (cdmVersion == "5") { - heelFile <- "AchillesHeel_v5.sql" - } else { - stop("Error: Invalid CDM Version number, use 4 or 5") - } - - heelSql <- SqlRender::loadRenderTranslateSql(sqlFilename = heelFile, - packageName = "Achilles", - dbms = connectionDetails$dbms, - oracleTempSchema = oracleTempSchema, - cdm_database_schema = cdmDatabaseSchema, - # results_database = resultsDatabase, - results_database_schema = resultsDatabaseSchema, - # vocab_database = vocabDatabase, - vocab_database_schema = vocabDatabaseSchema - ); - - conn <- DatabaseConnector::connect(connectionDetails); - writeLines("Executing Achilles Heel. This could take a while"); - DatabaseConnector::executeSql(conn,heelSql); - DatabaseConnector::disconnect(conn); - writeLines(paste("Done. Achilles Heel results can now be found in",resultsDatabaseSchema)) -} - -#new function to extract Heel resutls now when there are extra columns from inside R -#' @export -fetchAchillesHeelResults <- function (connectionDetails, resultsDatabaseSchema){ - connectionDetails$schema = resultsDatabaseSchema - conn <- DatabaseConnector::connect(connectionDetails) - - - sql <- "SELECT * FROM ACHILLES_heel_results" - sql <- SqlRender::renderSql(sql)$sql - res <- DatabaseConnector::querySql(conn,sql) - DatabaseConnector::disconnect(conn) - res -} - -#' execution of concept hierarchy creation -#' -#' @description -#' \code{conceptHierarchy} executes script to create the concept_hierarchy table. -#' -#' @details -#' \code{conceptHierarchy} executes script to create the concept_hierarchy table in the results schema, to be used by Atlas for treemap displays. -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is vocabDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' @param oracleTempSchema For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database. -#' @param resultsDatabaseSchema string name of database schema that we can write results to. Default is vocabDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' @param cdmVersion Define the OMOP CDM version used: currently support only "5". Default = "5" -#' -#' @return nothing is returned -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") -#' conceptHierarchy <- conceptHierarchy(connectionDetails, resultsDatabaseSchema="scratch", vocabDatabaseSchema="vocabulary") -#' } -#' @export -conceptHierarchy <- function (connectionDetails, - vocabDatabaseSchema, - oracleTempSchema = vocabDatabaseSchema, - resultsDatabaseSchema = vocabDatabaseSchema, - cdmVersion = "5"){ - - # resultsDatabase <- strsplit(resultsDatabaseSchema ,"\\.")[[1]][1] - # vocabDatabase <- strsplit(vocabDatabaseSchema ,"\\.")[[1]][1] - - if (cdmVersion == "5") { - hierarchyFile = "ConceptHierarchy_v5.sql" - } else { - stop("Error: Invalid CDM Version number, only version 5 supported") - } - - hierarchySql <- SqlRender::loadRenderTranslateSql(sqlFilename = hierarchyFile, - packageName = "Achilles", - dbms = connectionDetails$dbms, - oracleTempSchema = oracleTempSchema, - #cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ); - - conn <- DatabaseConnector::connect(connectionDetails); - writeLines("Executing Concept Hierarchy creation. This could take a while") - DatabaseConnector::executeSql(conn,hierarchySql) - writeLines(paste("Done. Concept Hierarchy table can now be found in",resultsDatabaseSchema)) -} - - -#' Create indices on Achilles results tables and concept hierarchy -#' -#' @description -#' \code{createIndices} executes script to create indicies on the Achilles tables. -#' -#' @details -#' \code{createIndices} executes script to create indicies on the Achilles tables. -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param resultsDatabaseSchema string name of database schema that holds the results tables for indexing. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' @param oracleTempSchema For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database. -#' @param sqlOnly if TRUE, only the SQL code will be generated -#' @param cdmVersion Define the OMOP CDM version used: currently support only "5". Default = "5" -#' -#' @return nothing is returned -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") -#' createIndices <- createIndices(connectionDetails, resultsDatabaseSchema="scratch") -#' } -#' @export -createIndices <- function (connectionDetails, - resultsDatabaseSchema, - oracleTempSchema = resultsDatabaseSchema, - sqlOnly = FALSE, - cdmVersion = "5"){ - - if (cdmVersion == "5") { - sqlFile = "Achilles_indices_v5.sql" - } else { - stop("Error: Invalid CDM Version number, only version 5 supported") - } - - if (connectionDetails$dbms=='redshift') { - stop("Error: RedShift does not support creating indices") - } - - is_pdw <- (connectionDetails$dbms == "pdw") - - indicesSql <- SqlRender::loadRenderTranslateSql(sqlFilename = sqlFile, - packageName = "Achilles", - dbms = connectionDetails$dbms, - oracleTempSchema = oracleTempSchema, - is_pdw = is_pdw, - results_database_schema = resultsDatabaseSchema); - - if (!sqlOnly) { - conn <- DatabaseConnector::connect(connectionDetails); - writeLines("Executing indices creation. This could take a while") - DatabaseConnector::executeSql(conn,indicesSql) - writeLines(paste("Done. Indices created in",resultsDatabaseSchema)) - } - return(indicesSql) -} - +# @file Achilles +# +# Copyright 2018 Observational Health Data Sciences and Informatics +# +# This file is part of Achilles +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# @author Observational Health Data Sciences and Informatics +# @author Martijn Schuemie +# @author Patrick Ryan +# @author Vojtech Huser +# @author Chris Knoll +# @author Ajit Londhe + + +#' The main Achilles analyses (for v5.x) +#' +#' @description +#' \code{achilles} creates descriptive statistics summary for an entire OMOP CDM instance. +#' +#' @details +#' \code{achilles} creates descriptive statistics summary for an entire OMOP CDM instance. +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param cdmDatabaseSchema Fully qualified name of database schema that contains OMOP CDM schema. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_instance.dbo'. +#' @param resultsDatabaseSchema Fully qualified name of database schema that we can write final results to. Default is cdmDatabaseSchema. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'. +#' @param scratchDatabaseSchema Fully qualified name of the database schema that will store all of the intermediate scratch tables, so for example, on SQL Server, 'cdm_scratch.dbo'. +#' Must be accessible to/from the cdmDatabaseSchema and the resultsDatabaseSchema. Default is resultsDatabaseSchema. +#' Making this "#" will run Achilles in single-threaded mode and use temporary tables instead of permanent tables. +#' @param vocabDatabaseSchema String name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' @param sourceName String name of the data source name. If blank, CDM_SOURCE table will be queried to try to obtain this. +#' @param analysisIds (OPTIONAL) A vector containing the set of Achilles analysisIds for which results will be generated. +#' If not specified, all analyses will be executed. Use \code{\link{getAnalysisDetails}} to get a list of all Achilles analyses and their Ids. +#' @param createTable If true, new results tables will be created in the results schema. If not, the tables are assumed to already exist, and analysis results will be inserted (slower on MPP). +#' @param smallCellCount To avoid patient identifiability, cells with small counts (<= smallCellCount) are deleted. Set to NULL if you don't want any deletions. +#' @param cdmVersion Define the OMOP CDM version used: currently supports v5 and above. Use major release number or minor number only (e.g. 5, 5.3) +#' @param runHeel Boolean to determine if Achilles Heel data quality reporting will be produced based on the summary statistics. Default = TRUE +#' @param validateSchema Boolean to determine if CDM Schema Validation should be run. Default = FALSE +#' @param runCostAnalysis Boolean to determine if cost analysis should be run. Note: only works on v5.1+ style cost tables. +#' @param conceptHierarchy Boolean to determine if the concept_hierarchy result table should be created, for use by Atlas treemaps. +#' Please note: this table creation only requires the Vocabulary, not the CDM itself. +#' You could run this once for 1 Vocab version, and then copy the table to all CDMs using that Vocab. +#' @param createIndices Boolean to determine if indices should be created on the resulting Achilles and concept_hierarchy table. Default= TRUE +#' @param numThreads (OPTIONAL, multi-threaded mode) The number of threads to use to run Achilles in parallel. Default is 1 thread. +#' @param tempAchillesPrefix (OPTIONAL, multi-threaded mode) The prefix to use for the scratch Achilles analyses tables. Default is "tmpach" +#' @param dropScratchTables (OPTIONAL, multi-threaded mode) TRUE = drop the scratch tables (may take time depending on dbms), FALSE = leave them in place for later removal. +#' @param sqlOnly Boolean to determine if Achilles should be fully executed. TRUE = just generate SQL files, don't actually run, FALSE = run Achilles +#' @param outputFolder (OPTIONAL, SQL-only mode) Path to store SQL files +#' @param logMultiThreadPerformance (OPTIONAL, multi-threaded mode) Should an RDS file of execution times for every analysis query be created in the outputFolder? +#' +#' @return An object of type \code{achillesResults} containing details for connecting to the database containing the results +#' @examples \dontrun{ +#' connectionDetails <- createConnectionDetails(dbms="sql server", server="some_server") +#' achillesResults <- achilles(connectionDetails = connectionDetails, +#' cdmDatabaseSchema = "cdm", +#' resultsDatabaseSchema="results", +#' scratchDatabaseSchema="scratch", +#' sourceName="Some Source", +#' cdmVersion = "5.3", +#' runCostAnalysis = TRUE, +#' numThreads = 10) +#' } +#' @export +achilles <- function (connectionDetails, + cdmDatabaseSchema, + oracleTempSchema = cdmDatabaseSchema, + resultsDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = resultsDatabaseSchema, + vocabDatabaseSchema = cdmDatabaseSchema, + sourceName = "", + analysisIds, + createTable = TRUE, + smallCellCount = 5, + cdmVersion = "5", + runHeel = TRUE, + validateSchema = FALSE, + runCostAnalysis = FALSE, + conceptHierarchy = TRUE, + createIndices = TRUE, + numThreads = 1, + tempAchillesPrefix = "tmpach", + dropScratchTables = TRUE, + sqlOnly = FALSE, + outputFolder = "output", + logMultiThreadPerformance = FALSE) { + + achillesSql <- c() + + # Try to get CDM Version if not provided ---------------------------------------------------------------------------------------- + + if (missing(cdmVersion)) { + cdmVersion <- .getCdmVersion(connectionDetails, cdmDatabaseSchema) + } + + # Check CDM version is valid --------------------------------------------------------------------------------------------------- + + if (compareVersion(a = as.character(cdmVersion), b = "5") < 0) { + stop("Error: Invalid CDM Version number; this function is only for v5 and above. + See Achilles Git Repo to find v4 compatible version of Achilles.") + } + + # Establish folder paths -------------------------------------------------------------------------------------------------------- + + if (sqlOnly | logMultiThreadPerformance) { + if (!dir.exists(outputFolder)) { + dir.create(path = outputFolder, recursive = TRUE) + } + unlink(file.path(outputFolder, "achillesLog.rds")) + } + + + # (optional) Validate CDM schema -------------------------------------------------------------------------------------------------- + + if (validateSchema) { + validateSchema(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + runCostAnalysis = runCostAnalysis, + cdmVersion = cdmVersion) + } + + # Get source name if none provided -------------------------------------------------- + + if (missing(sourceName) & !sqlOnly) { + sql <- SqlRender::renderSql(sql = "select top 1 cdm_source_name + from @cdmDatabaseSchema.cdm_source", + cdmDatabaseSchema = cdmDatabaseSchema)$sql + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + sourceName <- tryCatch({ + s <- DatabaseConnector::querySql(connection = connection, sql = sql) + }, error = function (e) { + s <- "" + }, finally = { + DatabaseConnector::disconnect(connection = connection) + rm(connection) + }) + } + + # Obtain analyses to run -------------------------------------------------------------------------------------------------------- + + analysisDetails <- getAnalysisDetails() + if (!missing(analysisIds)) { + analysisDetails <- analysisDetails[analysisDetails$ANALYSIS_ID %in% analysisIds, ] + } + + if (!runCostAnalysis) { + analysisDetails <- analysisDetails[analysisDetails$COST == 0, ] + } + + # Check if cohort table is present + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + resultsTables <- lapply(DatabaseConnector::getTableNames(connection = connection, + databaseSchema = resultsDatabaseSchema), function(t) tolower(t)) + DatabaseConnector::disconnect(connection = connection) + + if (!"cohort" %in% resultsTables) { + analysisDetails <- analysisDetails[!analysisDetails$ANALYSIS_ID %in% c(1700,1701),] + } + + resultsTables <- list( + list(detailType = "results", + tablePrefix = tempAchillesPrefix, + schema = read.csv(file = system.file("csv", "schemas", "schema_achilles_results.csv", package = "Achilles"), + header = TRUE), + analysisIds = analysisDetails[analysisDetails$DISTRIBUTION <= 0, ]$ANALYSIS_ID), + list(detailType = "results_dist", + tablePrefix = sprintf("%1s_%2s", tempAchillesPrefix, "dist"), + schema = read.csv(file = system.file("csv", "schemas", "schema_achilles_results_dist.csv", package = "Achilles"), + header = TRUE), + analysisIds = analysisDetails[abs(analysisDetails$DISTRIBUTION) == 1, ]$ANALYSIS_ID)) + + # Initialize thread and scratchDatabaseSchema settings and verify OhdsiRTools installed --------------------------- + + schemaDelim <- "." + + if (numThreads == 1 || scratchDatabaseSchema == "#") { + numThreads <- 1 + scratchDatabaseSchema <- "#" + schemaDelim <- "s_" + + # first invocation of the connection, to persist throughout to maintain temp tables + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + } else { + if (!.is_installed("OhdsiRTools")) { + writeLines("Installing OhdsiRTools for multi-threading support") + devtools::install_github("OHDSI/OhdsiRTools") + } + } + + # Create analysis table ------------------------------------------------------------- + + if (createTable) { + analysesSqls <- apply(analysisDetails, 1, function(analysisDetail) { + SqlRender::renderSql("select @analysisId as analysis_id, '@analysisName' as analysis_name, + '@stratum1Name' as stratum_1_name, '@stratum2Name' as stratum_2_name, + '@stratum3Name' as stratum_3_name, '@stratum4Name' as stratum_4_name, + '@stratum5Name' as stratum_5_name", + analysisId = analysisDetail["ANALYSIS_ID"], + analysisName = analysisDetail["ANALYSIS_NAME"], + stratum1Name = analysisDetail["STRATUM_1_NAME"], + stratum2Name = analysisDetail["STRATUM_2_NAME"], + stratum3Name = analysisDetail["STRATUM_3_NAME"], + stratum4Name = analysisDetail["STRATUM_4_NAME"], + stratum5Name = analysisDetail["STRATUM_5_NAME"])$sql + }) + + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "analyses/create_analysis_table.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + resultsDatabaseSchema = resultsDatabaseSchema, + analysesSqls = paste(analysesSqls, collapse = " \nunion all\n ")) + + achillesSql <- c(achillesSql, sql) + + if (!sqlOnly) { + if (numThreads == 1) { + # connection is already alive + DatabaseConnector::executeSql(connection = connection, sql = sql) + } else { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + } + } + } + + # Generate cost analyses ---------------------------------------------------------- + + if (runCostAnalysis) { + + distCostAnalysisDetails <- analysisDetails[analysisDetails$COST == 1 & analysisDetails$DISTRIBUTION == 1, ] + costMappings <- read.csv(system.file("csv", "achilles", "achilles_cost_columns.csv", package = "Achilles"), + header = TRUE, stringsAsFactors = FALSE) + + drugCostMappings <- costMappings[costMappings$DOMAIN == "Drug", ] + procedureCostMappings <- costMappings[costMappings$DOMAIN == "Procedure", ] + + ## Create raw cost tables before generating cost analyses + + rawCostSqls <- lapply(c("Drug", "Procedure"), function(domainId) { + costMappings <- get(sprintf("%sCostMappings", tolower(domainId))) + + if (cdmVersion == "5") { + costColumns <- apply(costMappings, 1, function(c) { + sprintf("%1s as %2s", c["OLD"], c["CURRENT"]) + }) + } else { + costColumns <- costMappings$CURRENT + } + list( + analysisId = domainId, + sql = SqlRender::loadRenderTranslateSql(sqlFilename = "analyses/raw_cost_template.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdmDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + tempAchillesPrefix = tempAchillesPrefix, + domainId = domainId, + domainTable = ifelse(domainId == "Drug", "drug_exposure", "procedure_occurrence"), + costColumns = paste(costColumns, collapse = ",")) + ) + }) + + achillesSql <- c(achillesSql, rawCostSqls) + + if (!sqlOnly) { + if (numThreads == 1) { + for (rawCostSql in rawCostSqls) { + DatabaseConnector::executeSql(connection = connection, sql = rawCostSql$sql) + } + } else { + cluster <- OhdsiRTools::makeCluster(numberOfThreads = length(rawCostSqls), + singleThreadToMain = TRUE) + results <- OhdsiRTools::clusterApply(cluster = cluster, + x = rawCostSqls, + function(rawCostSql) { + start <- Sys.time() + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = rawCostSql$sql) + DatabaseConnector::disconnect(connection = connection) + + df <- data.frame( + queryName = "Raw Cost", + queryId = rawCostSql$analysisId, + executionTime = Sys.time() - start + ) + }) + if (logMultiThreadPerformance) { + .logMtPerformance(results, outputFolder) + } + OhdsiRTools::stopCluster(cluster = cluster) + } + } + + distCostDrugSqls <- + apply(distCostAnalysisDetails[distCostAnalysisDetails$STRATUM_1_NAME == "drug_concept_id", ], 1, + function (analysisDetail) { + list(analysisId = analysisDetail["ANALYSIS_ID"][[1]], + sql = SqlRender::loadRenderTranslateSql(sqlFilename = "analyses/cost_distribution_template.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdmVersion = cdmVersion, + schemaDelim = schemaDelim, + cdmDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + costColumn = drugCostMappings[drugCostMappings$OLD == analysisDetail["DISTRIBUTED_FIELD"][[1]], ]$CURRENT, + domainId = "Drug", + domainTable = "drug_exposure", + analysisId = analysisDetail["ANALYSIS_ID"][[1]], + tempAchillesPrefix = tempAchillesPrefix) + ) + }) + + distCostProcedureSqls <- + apply(distCostAnalysisDetails[distCostAnalysisDetails$STRATUM_1_NAME == "procedure_concept_id", ], 1, + function (analysisDetail) { + list(analysisId = analysisDetail["ANALYSIS_ID"][[1]], + sql = SqlRender::loadRenderTranslateSql(sqlFilename = "analyses/cost_distribution_template.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdmVersion = cdmVersion, + schemaDelim = schemaDelim, + cdmDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + costColumn = procedureCostMappings[procedureCostMappings$OLD == analysisDetail["DISTRIBUTED_FIELD"][[1]], ]$CURRENT, + domainId = "Procedure", + domainTable = "procedure_occurrence", + analysisId = analysisDetail["ANALYSIS_ID"][[1]], + tempAchillesPrefix = tempAchillesPrefix) + ) + }) + + distCostAnalysisSqls <- c(distCostDrugSqls, distCostProcedureSqls) + + dropRawCostSqls <- lapply(c("Drug", "Procedure"), function(domainId) { + SqlRender::renderSql(sql = "drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_@domainId_cost_raw;", + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + tempAchillesPrefix = tempAchillesPrefix, + domainId = domainId)$sql + }) + + achillesSql <- c(achillesSql, lapply(distCostAnalysisSqls, function(s) s$sql), dropRawCostSqls) + + if (!sqlOnly) { + if (numThreads == 1) { + for (distCostAnalysisSql in distCostAnalysisSqls) { + DatabaseConnector::executeSql(connection = connection, sql = distCostAnalysisSql$sql) + } + for (dropRawCostSql in dropRawCostSqls) { + DatabaseConnector::executeSql(connection = connection, sql = dropRawCostSql) + } + } else { + cluster <- OhdsiRTools::makeCluster(numberOfThreads = length(distCostAnalysisSqls), + singleThreadToMain = TRUE) + results <- OhdsiRTools::clusterApply(cluster = cluster, + x = distCostAnalysisSqls, + function(distCostAnalysisSql) { + start <- Sys.time() + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = distCostAnalysisSql$sql) + DatabaseConnector::disconnect(connection = connection) + + df <- data.frame( + queryName = "Cost Analysis", + queryId = distCostAnalysisSql$analysisId, + executionTime = Sys.time() - start + ) + }) + if (logMultiThreadPerformance) { + .logMtPerformance(results, outputFolder) + } + OhdsiRTools::stopCluster(cluster = cluster) + + cluster <- OhdsiRTools::makeCluster(numberOfThreads = length(dropRawCostSqls), + singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = dropRawCostSqls, + function(dropRawCostSql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = dropRawCostSql) + DatabaseConnector::disconnect(connection = connection) + }) + OhdsiRTools::stopCluster(cluster = cluster) + } + } + } + + # Clean up existing scratch tables ----------------------------------------------- + + if (numThreads > 1 & !sqlOnly) { + # Drop the scratch tables + writeLines(sprintf("Dropping scratch Achilles tables from schema %s", scratchDatabaseSchema)) + + dropAllScratchTables(connectionDetails = connectionDetails, + scratchDatabaseSchema = scratchDatabaseSchema, + tempAchillesPrefix = tempAchillesPrefix, + numThreads = numThreads, + tableTypes = c("achilles", "concept_hierarchy")) + + writeLines(sprintf("Temporary Achilles tables removed from schema %s", scratchDatabaseSchema)) + } + + # Generate Main Analyses ---------------------------------------------------------------------------------------------------------------- + + mainAnalysisIds <- analysisDetails$ANALYSIS_ID + if (runCostAnalysis) { + # remove distributed cost analysis ids, since that's been executed already + mainAnalysisIds <- dplyr::anti_join(x = analysisDetails, y = distCostAnalysisDetails, by = "ANALYSIS_ID")$ANALYSIS_ID + } + mainSqls <- lapply(mainAnalysisIds, function(analysisId) { + list(analysisId = analysisId, + sql = .getAnalysisSql(analysisId = analysisId, + connectionDetails = connectionDetails, + schemaDelim = schemaDelim, + scratchDatabaseSchema = scratchDatabaseSchema, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + cdmVersion = cdmVersion, + tempAchillesPrefix = tempAchillesPrefix, + resultsTables = resultsTables, + sourceName = sourceName, + numThreads = numThreads) + ) + }) + + achillesSql <- c(achillesSql, lapply(mainSqls, function(s) s$sql)) + + if (!sqlOnly) { + writeLines("Executing multiple queries. This could take a while") + + if (numThreads == 1) { + for (mainSql in mainSqls) { + DatabaseConnector::executeSql(connection = connection, sql = mainSql$sql) + } + } else { + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + results <- OhdsiRTools::clusterApply(cluster = cluster, + x = mainSqls, + function(mainSql) { + start <- Sys.time() + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = mainSql$sql) + DatabaseConnector::disconnect(connection = connection) + + df <- data.frame( + queryName = "Main Analysis", + queryId = mainSql$analysisId, + executionTime = Sys.time() - start + ) + }) + if (logMultiThreadPerformance) { + .logMtPerformance(results, outputFolder) + } + OhdsiRTools::stopCluster(cluster = cluster) + } + } + + # Merge scratch tables into final analysis tables ------------------------------------------------------------------------------------------- + + include <- sapply(resultsTables, function(d) { any(d$analysisIds %in% analysisDetails$ANALYSIS_ID) }) + resultsTablesToMerge <- resultsTables[include] + + mergeSqls <- lapply(resultsTablesToMerge, function(table) { + .mergeAchillesScratchTables(resultsTable = table, + connectionDetails = connectionDetails, + analysisIds = analysisDetails$ANALYSIS_ID, + createTable = createTable, + schemaDelim = schemaDelim, + scratchDatabaseSchema = scratchDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + cdmVersion = cdmVersion, + tempAchillesPrefix = tempAchillesPrefix, + numThreads = numThreads, + smallCellCount = smallCellCount) + }) + + achillesSql <- c(achillesSql, mergeSqls) + + if (!sqlOnly) { + + writeLines("Merging scratch Achilles tables") + + if (numThreads == 1) { + for (sql in mergeSqls) { + DatabaseConnector::executeSql(connection = connection, sql = sql) + } + } else { + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = mergeSqls, + function(sql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + }) + OhdsiRTools::stopCluster(cluster = cluster) + } + } + + if (!sqlOnly) { + writeLines(sprintf("Done. Achilles results can now be found in schema %s", resultsDatabaseSchema)) + } + + # Clean up scratch tables ----------------------------------------------- + + if (numThreads == 1) { + # Dropping the connection removes the temporary scratch tables if running in serial + DatabaseConnector::disconnect(connection = connection) + } else if (dropScratchTables & !sqlOnly) { + # Drop the scratch tables + writeLines(sprintf("Dropping scratch Achilles tables from schema %s", scratchDatabaseSchema)) + + dropAllScratchTables(connectionDetails = connectionDetails, + scratchDatabaseSchema = scratchDatabaseSchema, + tempAchillesPrefix = tempAchillesPrefix, + numThreads = numThreads, + tableTypes = c("achilles")) + + writeLines(sprintf("Temporary Achilles tables removed from schema %s", scratchDatabaseSchema)) + } + + # Create concept hierarchy table ----------------------------------------------------------------- + + hierarchySql <- "/* CONCEPT HIERARCHY EXECUTION SKIPPED PER USER REQUEST */" + if (conceptHierarchy) { + hierarchySql <- createConceptHierarchy(connectionDetails = connectionDetails, + resultsDatabaseSchema = resultsDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + vocabDatabaseSchema = vocabDatabaseSchema, + numThreads = numThreads, + tempAchillesPrefix = tempAchillesPrefix, + sqlOnly = sqlOnly) + } + achillesSql <- c(achillesSql, hierarchySql) + + + # Create indices ----------------------------------------------------------------- + + indicesSql <- "/* INDEX CREATION SKIPPED PER USER REQUEST */" + + if (createIndices) { + indicesSql <- createIndices(connectionDetails, + resultsDatabaseSchema, + sqlOnly) + } + achillesSql <- c(achillesSql, indicesSql) + + # Run Heel? --------------------------------------------------------------- + + heelSql <- "/* HEEL EXECUTION SKIPPED PER USER REQUEST */" + if (runHeel) { + heelSql <- achillesHeel(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + cdmVersion = cdmVersion, + sqlOnly = sqlOnly, + numThreads = numThreads, + tempHeelPrefix = "tmpheel", + dropScratchTables = dropScratchTables, + outputFolder = outputFolder) + heelSql <- paste(heelSql, collapse = "\n\n") + } + + achillesSql <- c(achillesSql, heelSql) + + achillesResults <- list(resultsConnectionDetails = connectionDetails, + resultsTable = "achilles_results", + resultsDistributionTable = "achilles_results_dist", + analysis_table = "achilles_analysis", + sourceName = sourceName, + analysisIds = analysisDetails$ANALYSIS_ID, + AchillesSql = paste(achillesSql, collapse = "\n\n"), + HeelSql = heelSql, + HierarchySql = hierarchySql, + IndicesSql = indicesSql, + call = match.call()) + + class(achillesResults) <- "achillesResults" + + if (sqlOnly) { + SqlRender::writeSql(sql = paste(achillesSql, collapse = "\n\n"), targetFile = file.path(outputFolder, "achilles.sql")) + writeLines(sprintf("All Achilles SQL scripts can be found in folder: %s", file.path(outputFolder, "achilles.sql"))) + } + + return (achillesResults) +} + +#' Create the concept hierarchy +#' +#' @details +#' Post-processing, create the concept hierarchy. +#' Please note: this table creation only requires the Vocabulary, not the CDM itself. +#' You could run this once for 1 Vocab version, and then copy the table to all CDMs using that Vocab. +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param resultsDatabaseSchema Fully qualified name of database schema that we can write final results to. Default is cdmDatabaseSchema. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'. +#' @param scratchDatabaseSchema Fully qualified name of the database schema that will store all of the intermediate scratch tables, so for example, on SQL Server, 'cdm_scratch.dbo'. +#' Must be accessible to/from the cdmDatabaseSchema and the resultsDatabaseSchema. Default is resultsDatabaseSchema. +#' Making this "#" will run Achilles in single-threaded mode and use temporary tables instead of permanent tables. +#' @param vocabDatabaseSchema String name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' @param numThreads (OPTIONAL, multi-threaded mode) The number of threads to use to run Achilles in parallel. Default is 1 thread. +#' @param tempAchillesPrefix (OPTIONAL, multi-threaded mode) The prefix to use for the scratch Achilles analyses tables. Default is "tmpach" +#' @param sqlOnly TRUE = just generate SQL files, don't actually run, FALSE = run Achilles +#' +#' @export +createConceptHierarchy <- function(connectionDetails, + resultsDatabaseSchema, + scratchDatabaseSchema, + vocabDatabaseSchema, + numThreads = 1, + tempAchillesPrefix = "tmpach", + sqlOnly = FALSE) { + + schemaDelim <- "." + + if (numThreads == 1 || scratchDatabaseSchema == "#") { + numThreads <- 1 + scratchDatabaseSchema <- "#" + schemaDelim <- "s_" + } + + hierarchySqlFiles <- list.files(path = file.path(system.file(package = "Achilles"), + "sql", "sql_server", "post_processing", "concept_hierarchies"), + recursive = TRUE, + full.names = FALSE, + all.files = FALSE, + pattern = "\\.sql$") + + hierarchySqls <- lapply(hierarchySqlFiles, function(hierarchySqlFile) { + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("post_processing", + "concept_hierarchies", + hierarchySqlFile), + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + scratchDatabaseSchema = scratchDatabaseSchema, + vocabDatabaseSchema = vocabDatabaseSchema, + schemaDelim = schemaDelim, + tempAchillesPrefix = tempAchillesPrefix) + }) + + mergeSql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("post_processing", + "merge_concept_hierarchy.sql"), + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + resultsDatabaseSchema = resultsDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + tempAchillesPrefix = tempAchillesPrefix) + + + if (!sqlOnly) { + writeLines("Executing Concept Hierarchy creation. This could take a while") + + if (numThreads == 1) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + for (sql in hierarchySqls) { + DatabaseConnector::executeSql(connection = connection, sql = sql) + } + DatabaseConnector::executeSql(connection = connection, sql = mergeSql) + DatabaseConnector::disconnect(connection = connection) + } else { + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = hierarchySqls, + function(sql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + }) + OhdsiRTools::stopCluster(cluster = cluster) + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = mergeSql) + DatabaseConnector::disconnect(connection = connection) + } + + dropAllScratchTables(connectionDetails = connectionDetails, + scratchDatabaseSchema = scratchDatabaseSchema, + tempAchillesPrefix = tempAchillesPrefix, + numThreads = numThreads, + tableTypes = c("concept_hierarchy")) + + writeLines(sprintf("Done. Concept Hierarchy table can now be found in %s", resultsDatabaseSchema)) + } + + return (c(hierarchySqls, mergeSql)) +} + + +#' Create indicies +#' +#' @details +#' Post-processing, create indices to help performance. Cannot be used with Redshift. +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param resultsDatabaseSchema Fully qualified name of database schema that we can write final results to. Default is cdmDatabaseSchema. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'. +#' @param sqlOnly TRUE = just generate SQL files, don't actually run, FALSE = run Achilles +#' +#' @export +createIndices <- function(connectionDetails, + resultsDatabaseSchema, + sqlOnly = FALSE) { + + if (connectionDetails$dbms %in% c("redshift", "netezza")) { + return (sprintf("/* INDEX CREATION SKIPPED, INDICES NOT SUPPORTED IN %s */", toupper(connectionDetails$dbms))) + } + indicesSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "post_processing/achilles_indices.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + resultsDatabaseSchema = resultsDatabaseSchema) + + if (!sqlOnly) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = indicesSql) + DatabaseConnector::disconnect(connection = connection) + } + + return (indicesSql) +} + + + +#' Validate the CDM schema +#' +#' @details +#' Runs a validation script to ensure the CDM is valid based on v5.x +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param cdmDatabaseSchema string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'. +#' @param resultsDatabaseSchema Fully qualified name of database schema that the cohort table is written to. Default is cdmDatabaseSchema. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'. +#' @param cdmVersion Define the OMOP CDM version used: currently supports v5 and above. Use major release number or minor number only (e.g. 5, 5.3) +#' @param runCostAnalysis Boolean to determine if cost analysis should be run. Note: only works on CDM v5 and v5.1.0+ style cost tables. +#' @param sqlOnly TRUE = just generate SQL files, don't actually run, FALSE = run Achilles +#' +#' @export +validateSchema <- function(connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema = cdmDatabaseSchema, + cdmVersion, + runCostAnalysis, + sqlOnly = FALSE) { + + outputFolder <- "output" + + majorVersions <- lapply(c("5", "5.1", "5.2", "5.3"), function(majorVersion) { + if (compareVersion(a = as.character(cdmVersion), b = majorVersion) >= 0) { + majorVersion + } else { + 0 + } + }) + + cdmVersion <- max(unlist(majorVersions)) + + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "validate_schema.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + runCostAnalysis = runCostAnalysis, + cdmVersion = cdmVersion) + if (sqlOnly) { + SqlRender::writeSql(sql = sql, targetFile = file.path(outputFolder, "ValidateSchema.sql")) + } else { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + tables <- DatabaseConnector::querySql(connection = connection, sql = sql) + writeLines("CDM Schema is valid") + DatabaseConnector::disconnect(connection = connection) + } + + return (sql) +} + +#' Get all analysis details +#' +#' @details +#' Get a list of all analyses with their analysis IDs and strata. +#' +#' @return +#' A data.frame with the analysis details. +#' +#' @export +getAnalysisDetails <- function() { + pathToCsv <- system.file("csv", "achilles", "achilles_analysis_details.csv", package = "Achilles") + analysisDetails <- read.csv(file = pathToCsv, header = TRUE, stringsAsFactors = FALSE) + return (analysisDetails) +} + +#' Drop all possible scratch tables +#' +#' @details +#' Drop all possible Achilles, Heel, and Concept Hierarchy scratch tables +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param scratchDatabaseSchema string name of database schema that Achilles scratch tables were written to. +#' @param tempAchillesPrefix The prefix to use for the "temporary" (but actually permanent) Achilles analyses tables. Default is "tmpach" +#' @param tempHeelPrefix The prefix to use for the "temporary" (but actually permanent) Heel tables. Default is "tmpheel" +#' @param numThreads The number of threads to use to run this function. Default is 1 thread. +#' @param tableTypes The types of Achilles scratch tables to drop: achilles or heel or concept_hierarchy or all 3 +#' +#' @export +dropAllScratchTables <- function(connectionDetails, + scratchDatabaseSchema, + tempAchillesPrefix = "tmpach", + tempHeelPrefix = "tmpheel", + numThreads = 1, + tableTypes = c("achilles", "heel", "concept_hierarchy")) { + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + + scratchTables <- lapply(DatabaseConnector::getTableNames(connection = connection, + databaseSchema = scratchDatabaseSchema), function(t) tolower(t)) + + if ("achilles" %in% tableTypes) { + + # Drop Achilles Scratch Tables ------------------------------------------------------ + + analysisDetails <- getAnalysisDetails() + + resultsTables <- lapply(analysisDetails$ANALYSIS_ID[analysisDetails$DISTRIBUTION <= 0], function(id) { + sprintf("%s_%d", tempAchillesPrefix, id) + }) + + resultsDistTables <- lapply(analysisDetails$ANALYSIS_ID[abs(analysisDetails$DISTRIBUTION) == 1], function(id) { + sprintf("%s_dist_%d", tempAchillesPrefix, id) + }) + + dropTables <- c(Reduce(intersect, list(scratchTables, resultsTables)), + Reduce(intersect, list(scratchTables, resultsDistTables))) + + dropSqls <- lapply(dropTables, function(scratchTable) { + SqlRender::renderSql("drop table @scratchDatabaseSchema.@scratchTable;", + scratchDatabaseSchema = scratchDatabaseSchema, + scratchTable = scratchTable)$sql + }) + + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = dropSqls, + function(sql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + }) + + OhdsiRTools::stopCluster(cluster = cluster) + } + + if ("heel" %in% tableTypes) { + # Drop Parallel Heel Scratch Tables ------------------------------------------------------ + + parallelFiles <- list.files(path = file.path(system.file(package = "Achilles"), + "sql/sql_server/heels/parallel"), + recursive = TRUE, + full.names = FALSE, + all.files = FALSE, + pattern = "\\.sql$") + + parallelHeelTables <- lapply(parallelFiles, function(t) tolower(paste(tempHeelPrefix, + trimws(tools::file_path_sans_ext(basename(t))), + sep = "_"))) + + dropTables <- Reduce(intersect, list(scratchTables, parallelHeelTables)) + + dropSqls <- lapply(dropTables, function(scratchTable) { + SqlRender::renderSql("drop table @scratchDatabaseSchema.@scratchTable;", + scratchDatabaseSchema = scratchDatabaseSchema, + scratchTable = scratchTable)$sql + }) + + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = dropSqls, + function(sql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + }) + + OhdsiRTools::stopCluster(cluster = cluster) + } + + if ("concept_hierarchy" %in% tableTypes) { + # Drop Concept Hierarchy Tables ------------------------------------------------------ + + hierarchySqlFiles <- list.files(path = file.path(system.file(package = "Achilles"), + "sql", "sql_server", "post_processing", "concept_hierarchies"), + recursive = TRUE, + full.names = FALSE, + all.files = FALSE, + pattern = "\\.sql$") + + conceptHierarchyTables <- lapply(hierarchySqlFiles, function(t) tolower(paste(tempAchillesPrefix, "ch", + trimws(tools::file_path_sans_ext(basename(t))), + sep = "_"))) + dropTables <- Reduce(intersect, list(scratchTables, conceptHierarchyTables)) + + dropSqls <- lapply(dropTables, function(scratchTable) { + SqlRender::renderSql("drop table @scratchDatabaseSchema.@scratchTable;", + scratchDatabaseSchema = scratchDatabaseSchema, + scratchTable = scratchTable)$sql + }) + + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = dropSqls, + function(sql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + }) + + OhdsiRTools::stopCluster(cluster = cluster) + } +} + +.getCdmVersion <- function(connectionDetails, cdmDatabaseSchema) { + sql <- SqlRender::renderSql(sql = "select top 1 cdm_version + from @cdmDatabaseSchema.cdm_source", + cdmDatabaseSchema = cdmDatabaseSchema)$sql + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + cdmVersion <- tryCatch({ + c <- DatabaseConnector::querySql(connection = connection, sql = sql) + }, error = function (e) { + c <- "" + }, finally = { + DatabaseConnector::disconnect(connection = connection) + connection <- NULL + }) + + return (c) +} + +.getAnalysisSql <- function(analysisId, + connectionDetails, + schemaDelim, + scratchDatabaseSchema, + cdmDatabaseSchema, + resultsDatabaseSchema, + cdmVersion, + tempAchillesPrefix, + resultsTables, + sourceName, + numThreads) { + outputFolder <- "output" + + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("analyses", paste(analysisId, "sql", sep = ".")), + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + scratchDatabaseSchema = scratchDatabaseSchema, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + schemaDelim = schemaDelim, + tempAchillesPrefix = tempAchillesPrefix, + source_name = sourceName, + achilles_version = packageVersion(pkg = "Achilles"), + cdmVersion = cdmVersion, + singleThreaded = (scratchDatabaseSchema == "#")) + + return (sql) +} + +.mergeAchillesScratchTables <- function(resultsTable, + analysisIds, + createTable, + connectionDetails, + schemaDelim, + scratchDatabaseSchema, + resultsDatabaseSchema, + cdmVersion, + tempAchillesPrefix, + numThreads, + smallCellCount) { + outputFolder <- "output" + + castedNames <- apply(resultsTable$schema, 1, function(field) { + SqlRender::renderSql("cast(@fieldName as @fieldType) as @fieldName", + fieldName = field["FIELD_NAME"], + fieldType = field["FIELD_TYPE"])$sql + }) + + detailSqls <- lapply(resultsTable$analysisIds[resultsTable$analysisIds %in% analysisIds], function(analysisId) { + sql <- SqlRender::renderSql(sql = "select @castedNames from + @scratchDatabaseSchema@schemaDelim@tablePrefix_@analysisId", + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + castedNames = paste(castedNames, collapse = ", "), + tablePrefix = resultsTable$tablePrefix, + analysisId = analysisId)$sql + + sql <- SqlRender::translateSql(sql = sql, targetDialect = connectionDetails$dbms)$sql + }) + + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "analyses/merge_achilles_tables.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + createTable = createTable, + resultsDatabaseSchema = resultsDatabaseSchema, + detailType = resultsTable$detailType, + detailSqls = paste(detailSqls, collapse = " \nunion all\n "), + fieldNames = paste(resultsTable$schema$FIELD_NAME, collapse = ", "), + smallCellCount = smallCellCount) + + return (sql) +} + +.logMtPerformance <- function(results, outputFolder) { + newDf <- do.call("rbind", results) + logFile <- file.path(outputFolder, "achillesLog.rds") + if (file.exists(logFile)) { + oldDf <- readRDS(logFile) + newDf <- rbind(oldDf, newDf) + } + + saveRDS(object = newDf, file = logFile) +} + +.is_installed <- function(pkg, version = 0) { + installed_version <- tryCatch(utils::packageVersion(pkg), error = function(e) NA) + !is.na(installed_version) && installed_version >= version +} + \ No newline at end of file diff --git a/R/AchillesHeel.R b/R/AchillesHeel.R new file mode 100755 index 00000000..e1b113b5 --- /dev/null +++ b/R/AchillesHeel.R @@ -0,0 +1,383 @@ +# @file AchillesHeel +# +# Copyright 2018 Observational Health Data Sciences and Informatics +# +# This file is part of Achilles +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# @author Observational Health Data Sciences and Informatics +# @author Martijn Schuemie +# @author Patrick Ryan +# @author Vojtech Huser +# @author Chris Knoll +# @author Ajit Londhe + + + +#' Execution of data quality rules (for v5 and above) +#' +#' @description +#' \code{achillesHeel} executes data quality rules (or checks) on pre-computed analyses (or measures). +#' +#' @details +#' \code{achillesHeel} contains number of rules (authored in SQL) that are executed against achilles results tables. +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param cdmDatabaseSchema string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'. +#' @param resultsDatabaseSchema string name of database schema that we can write final results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, +#' so for example 'results.dbo'. +#' @param scratchDatabaseSchema (OPTIONAL, multi-threaded mode) Name of a fully qualified schema that is accessible to/from the resultsDatabaseSchema, that can store all of the scratch tables. Default is resultsDatabaseSchema. +#' @param cdmVersion Define the OMOP CDM version used: currently supports v5 and above. Default = "5". +#' @param numThreads (OPTIONAL, multi-threaded mode) The number of threads to use to run Achilles in parallel. Default is 1 thread. +#' @param tempHeelPrefix (OPTIONAL, multi-threaded mode) The prefix to use for the "temporary" (but actually permanent) Heel tables. Default is "tmpheel" +#' @param dropScratchTables (OPTIONAL, multi-threaded mode) TRUE = drop the scratch tables (may take time depending on dbms), FALSE = leave them in place +#' @param ThresholdAgeWarning The maximum age to allow in Heel +#' @param ThresholdOutpatientVisitPerc The maximum percentage of outpatient visits among all visits +#' @param ThresholdMinimalPtMeasDxRx The minimum percentage of patients with at least 1 Measurement, 1 Dx, and 1 Rx +#' @param sqlOnly Boolean to determine if Heel should be fully executed. TRUE = just generate SQL files, don't actually run, FALSE = run Achilles Heel +#' @param outputFolder (OPTIONAL, sql only mode) Path to store SQL files +#' +#' @return nothing is returned +#' @examples \dontrun{ +#' connectionDetails <- createConnectionDetails(dbms="sql server", server="some_server") +#' achillesHeel <- achillesHeel(connectionDetails = connectionDetails, +#' cdmDatabaseSchema = "cdm", +#' resultsDatabaseSchema = "results", +#' scratchDatabaseSchema = "scratch", +#' cdmVersion = "5.3.0", +#' numThreads = 10) +#' } +#' @export +achillesHeel <- function(connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = resultsDatabaseSchema, + cdmVersion = "5", + numThreads = 1, + tempHeelPrefix = "tmpheel", + dropScratchTables = FALSE, + ThresholdAgeWarning = 125, + ThresholdOutpatientVisitPerc = 0.43, + ThresholdMinimalPtMeasDxRx = 20.5, + outputFolder = "output", + sqlOnly = FALSE) { + + # Try to get CDM Version if not provided ---------------------------------------------------------------------------------------- + + if (missing(cdmVersion)) { + cdmVersion <- .getCdmVersion(connectionDetails, cdmDatabaseSchema) + } + + # Check CDM version is valid --------------------------------------------------------------------------------------------------- + + if (compareVersion(a = cdmVersion, b = "5") < 0) { + stop("Error: Invalid CDM Version number; this function is only for v5 and above. + See Achilles Git Repo to find v4 compatible version of Achilles.") + } + + # Establish folder paths -------------------------------------------------------------------------------------------------------- + + if (sqlOnly & !dir.exists(outputFolder)) { + dir.create(path = outputFolder, recursive = TRUE) + } + + heelSql <- c() + + # Initialize thread and scratchDatabaseSchema settings ---------------------------------------------------------------- + + schemaDelim <- "." + + if (numThreads == 1 || scratchDatabaseSchema == "#") { + numThreads <- 1 + scratchDatabaseSchema <- "#" + schemaDelim <- "s_" + # first invocation of the connection, to persist throughout to maintain temp tables + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + } else { + if (!.is_installed("OhdsiRTools")) { + writeLines("Installing OhdsiRTools for multi-threading support") + devtools::install_github("OHDSI/OhdsiRTools") + } + } + + if (!sqlOnly) { + writeLines("Executing Achilles Heel. This could take a while") + } + + # Clean up existing scratch tables ----------------------------------------------- + + if (numThreads > 1 & !sqlOnly) { + # Drop the scratch tables + writeLines(sprintf("Dropping scratch Heel tables from schema %s", scratchDatabaseSchema)) + + dropAllScratchTables(connectionDetails = connectionDetails, + scratchDatabaseSchema = scratchDatabaseSchema, + tempAchillesPrefix = tempAchillesPrefix, + numThreads = numThreads, + tableTypes = c("heel")) + + writeLines(sprintf("Temporary Heel tables removed from schema %s", scratchDatabaseSchema)) + } + + # Generate parallel Heels --------------------------------------------------------------------------------------------------------- + + parallelFiles <- list.files(path = file.path(system.file(package = "Achilles"), + "sql", "sql_server", "heels", "parallel"), + recursive = TRUE, + full.names = TRUE, + all.files = FALSE, + pattern = "\\.sql$") + + parallelSqls <- lapply(parallelFiles, function(parallelFile) { + .getHeelSql(heelFile = parallelFile, + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + tempHeelPrefix = tempHeelPrefix, + numThreads = numThreads, + outputFolder = outputFolder) + }) + + heelSql <- c(heelSql, parallelSqls) + + if (!sqlOnly) { + if (numThreads == 1) { + for (sql in parallelSqls) { + DatabaseConnector::executeSql(connection = connection, sql = sql) + } + } else { + cluster <- OhdsiRTools::makeCluster(numberOfThreads = numThreads, singleThreadToMain = TRUE) + dummy <- OhdsiRTools::clusterApply(cluster = cluster, + x = parallelSqls, + function(sql) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + DatabaseConnector::executeSql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + }) + OhdsiRTools::stopCluster(cluster = cluster) + } + } + + # Merge scratch Heel tables into staging tables ---------------------------------------- + + isDerived <- sapply(parallelFiles, function(parallelFile) { grepl(pattern = "derived", parallelFile) }) + + derivedSqls <- lapply(parallelFiles[isDerived], function(parallelFile) { + SqlRender::renderSql(sql = + "select + cast(analysis_id as int) as analysis_id, + cast(stratum_1 as varchar(255)) as stratum_1, + cast(stratum_2 as varchar(255)) as stratum_2, + cast(statistic_value as float) as statistic_value, + cast(measure_id as varchar(255)) as measure_id + from @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName", + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = ifelse(scratchDatabaseSchema == "#", "s_", "."), + tempHeelPrefix = tempHeelPrefix, + heelName = gsub(pattern = ".sql", replacement = "", x = basename(parallelFile)))$sql + }) + + derivedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "heels/merge_derived.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + schema = "#", + schemaDelim = "", + destination = "achilles_rd_0", + derivedSqls = paste(derivedSqls, collapse = " \nunion all\n ")) + + resultSqls <- lapply(X = parallelFiles[!isDerived], function(parallelFile) { + SqlRender::renderSql(sql = + "select + cast(analysis_id as int) as analysis_id, + cast(ACHILLES_HEEL_warning as varchar(255)) as ACHILLES_HEEL_warning, + cast(rule_id as int) as rule_id, + cast(record_count as bigint) as record_count + from @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName", + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + tempHeelPrefix = tempHeelPrefix, + heelName = gsub(pattern = ".sql", replacement = "", x = basename(parallelFile)))$sql + }) + + resultSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "heels/merge_heel_results.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + schema = "#", + schemaDelim = "", + destination = "achilles_hr_0", + resultSqls = paste(resultSqls, collapse = " \nunion all\n ")) + + heelSql <- c(heelSql, derivedSql, resultSql) + + if (!sqlOnly) { + if (numThreads > 1) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + } + for (sql in c(derivedSql, resultSql)) { + DatabaseConnector::executeSql(connection = connection, sql = sql) + } + } + + # Run serial queries to finish up --------------------------------------------------- + + serialFiles <- read.csv(file = system.file("csv", "heel", "heel_rules_all.csv", package = "Achilles"), + header = TRUE, stringsAsFactors = FALSE) + + serialFiles <- serialFiles[serialFiles$execution_type == "serial", ] + + for (i in 1:nrow(serialFiles)) { + row <- serialFiles[i,] + newId <- rdOldId <- hrOldId <- as.integer(row$rule_id) + + if (i > 1) { + rdOldId = as.integer(max(serialFiles$rule_id[serialFiles$destination_table %in% c("results_derived", "both") & + serialFiles$rule_id < newId])) + hrOldId = as.integer(max(serialFiles$rule_id[serialFiles$destination_table %in% c("heel_results", "both") & + serialFiles$rule_id < newId])) + } + + serialSql <- SqlRender::loadRenderTranslateSql(sqlFilename = sprintf("heels/serial/rule_%d.sql", + as.integer(row$rule_id)), + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + resultsDatabaseSchema = resultsDatabaseSchema, + rdOldId = rdOldId, + hrOldId = hrOldId, + rdNewId = newId, + hrNewId = newId, + ThresholdAgeWarning = ThresholdAgeWarning, + ThresholdOutpatientVisitPerc = ThresholdOutpatientVisitPerc, + ThresholdMinimalPtMeasDxRx = ThresholdMinimalPtMeasDxRx) + + if (row$destination_table == "results_derived") { + drops <- c(sprintf("rd_%d", rdOldId)) + } else if (row$destination_table == "heel_results") { + drops <- c(sprintf("hr_%d", hrOldId)) + } else { + drops <- c(sprintf("rd_%d", rdOldId), sprintf("hr_%d", hrOldId)) + } + + sqlDropPrior <- "" + + if (i > 1) { + sqlDropPriors <- lapply(drops, function(drop) { + sql <- SqlRender::renderSql(sql = "IF OBJECT_ID('tempdb..#@table', 'U') IS NOT NULL DROP TABLE #@table;", + table = sprintf("serial_%2s", drop))$sql + sql <- SqlRender::translateSql(sql = sql, targetDialect = connectionDetails$dbms)$sql + }) + sqlDropPrior <- paste(sqlDropPriors, collapse = "\n\n") + } + + sql <- paste(serialSql, sqlDropPrior, sep = "\n\n") + + heelSql <- c(heelSql, sql) + + if (!sqlOnly) { + DatabaseConnector::executeSql(connection = connection, sql = sql) + } + } + + # Create final Heel Tables --------------------------------------------------- + + rdId = as.integer(max(serialFiles$rule_id[serialFiles$destination_table %in% c("results_derived", "both")])) + hrId = as.integer(max(serialFiles$rule_id[serialFiles$destination_table %in% c("heel_results", "both")])) + + sqlRd <- SqlRender::loadRenderTranslateSql(sqlFilename = "heels/merge_derived.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + schema = resultsDatabaseSchema, + schemaDelim = ".", + destination = "achilles_results_derived", + derivedSqls = sprintf("select * from #serial_rd_%d", + rdId)) + + sqlHr <- SqlRender::loadRenderTranslateSql(sqlFilename = "heels/merge_heel_results.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + schema = resultsDatabaseSchema, + schemaDelim = ".", + destination = "achilles_heel_results", + resultSqls = sprintf("select * from #serial_hr_%d", + hrId)) + + finalSqls <- c(sqlRd, sqlHr) + heelSql <- c(heelSql, finalSqls) + + if (!sqlOnly) { + for (sql in finalSqls) { + DatabaseConnector::executeSql(connection = connection, sql = sql) + } + } + + + # Clean up scratch parallel tables ----------------------------------------------- + + if (numThreads > 1 & !sqlOnly) { + # Drop the scratch tables + writeLines(sprintf("Dropping scratch Heel tables from schema %s", scratchDatabaseSchema)) + + dropAllScratchTables(connectionDetails = connectionDetails, + scratchDatabaseSchema = scratchDatabaseSchema, + tempAchillesPrefix = tempAchillesPrefix, + numThreads = numThreads, + tableTypes = c("heel")) + + writeLines(sprintf("Temporary Heel tables removed from schema %s", scratchDatabaseSchema)) + } + + heelSql <- paste(heelSql, collapse = "\n\n") + + if (sqlOnly) { + SqlRender::writeSql(sql = heelSql, targetFile = file.path(outputFolder, "achillesHeel.sql")) + writeLines(sprintf("All Achilles SQL scripts can be found in folder: %s", file.path(outputFolder, "achillesHeel.sql"))) + } else { + writeLines(sprintf("Done. Achilles Heel results can now be found in %s", resultsDatabaseSchema)) + } + + return (heelSql) +} + +.getHeelSql <- function(heelFile, + connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + scratchDatabaseSchema, + schemaDelim, + tempHeelPrefix, + numThreads, + outputFolder) { + + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = gsub(pattern = + file.path(system.file(package = "Achilles"), + "sql/sql_server/"), + replacement = "", x = heelFile), + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + scratchDatabaseSchema = scratchDatabaseSchema, + schemaDelim = schemaDelim, + tempHeelPrefix = tempHeelPrefix, + heelName = gsub(pattern = ".sql", replacement = "", x = basename(heelFile))) + + return (sql) +} diff --git a/R/AchillesViewResults.R b/R/AchillesViewResults.R old mode 100644 new mode 100755 index 6ba3097c..c3b8481f --- a/R/AchillesViewResults.R +++ b/R/AchillesViewResults.R @@ -1,88 +1,98 @@ -#' @title fetchAchillesHeelResults -#' -#' @description -#' \code{fetchAchillesHeelResults} retrieves the AchillesHeel results for the AChilles analysis to identify potential data quality issues. -#' -#' @details -#' AchillesHeel is a part of the Achilles analysis aimed at identifying potential data quality issues. It will list errors (things -#' that should really be fixed) and warnings (things that should at least be investigated). -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param resultsDatabase Name of database containing the Achilles descriptive statistics. -#' -#' @return A table listing all identified issues -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") -#' achillesResults <- achilles(connectionDetails, "cdm4_sim", "scratch", "TestDB") -#' fetchAchillesHeelResults(connectionDetails, "scratch") -#' } -#' @export -fetchAchillesHeelResults <- function (connectionDetails, resultsDatabase){ - connectionDetails$schema = resultsDatabase - conn <- DatabaseConnector::connect(connectionDetails) - - sql <- "SELECT * FROM ACHILLES_HEEL_results" - issues <- DatabaseConnector::querySql(conn,sql) - - DatabaseConnector::disconnect(conn) - - issues -} - -#' @title fetchAchillesAnalysisResults -#' -#' @description -#' \code{fetchAchillesAnalysisResults} returns the results for one Achilles analysis Id. -#' -#' @details -#' See \code{data(analysesDetails)} for a list of all Achilles analyses and their Ids. -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param resultsDatabase Name of database containing the Achilles descriptive statistics. -#' @param analysisId A single analysisId -#' -#' @return An object of type \code{achillesAnalysisResults} -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") -#' achillesResults <- achilles(connectionDetails, "cdm4_sim", "scratch", "TestDB") -#' fetchAchillesAnalysisResults(connectionDetails, "scratch",106) -#' } -#' @export -fetchAchillesAnalysisResults <- function (connectionDetails, resultsDatabase, analysisId){ - connectionDetails$schema = resultsDatabase - conn <- DatabaseConnector::connect(connectionDetails) - - sql <- "SELECT * FROM ACHILLES_analysis WHERE analysis_id = @analysisId" - sql <- SqlRender::renderSql(sql,analysisId = analysisId)$sql - analysisDetails <- DatabaseConnector::querySql(conn,sql) - - sql <- "SELECT * FROM ACHILLES_results WHERE analysis_id = @analysisId" - sql <- SqlRender::renderSql(sql,analysisId = analysisId)$sql - analysisResults <- DatabaseConnector::querySql(conn,sql) - - if (nrow(analysisResults) == 0){ - sql <- "SELECT * FROM ACHILLES_results_dist WHERE analysis_id = @analysisId" - sql <- SqlRender::renderSql(sql,analysisId = analysisId)$sql - analysisResults <- DatabaseConnector::querySql(conn,sql) - } - - colnames(analysisDetails) <- toupper(colnames(analysisDetails)) - colnames(analysisResults) <- toupper(colnames(analysisResults)) - - for (i in 1:5){ - stratumName <- analysisDetails[,paste("STRATUM",i,"NAME",sep="_")] - if (is.na(stratumName)){ - analysisResults[,paste("STRATUM",i,sep="_")] <- NULL - } else { - colnames(analysisResults)[colnames(analysisResults) == paste("STRATUM",i,sep="_")] <- toupper(stratumName) - } - } - - DatabaseConnector::disconnect(conn) - - result <- list(analysisId = analysisId, - analysisName = analysisDetails$ANALYSIS_NAME, - analysisResults = analysisResults) - class(result) <- "achillesAnalysisResults" - result -} +#' @title fetchAchillesHeelResults +#' +#' @description +#' \code{fetchAchillesHeelResults} retrieves the AchillesHeel results for the AChilles analysis to identify potential data quality issues. +#' +#' @details +#' AchillesHeel is a part of the Achilles analysis aimed at identifying potential data quality issues. It will list errors (things +#' that should really be fixed) and warnings (things that should at least be investigated). +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param resultsDatabaseSchema Fully qualified name of database schema that we can fetch final results from. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'. +#' +#' @return A table listing all identified issues +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") +#' achillesResults <- achilles(connectionDetails, "cdm5_sim", "scratch", "TestDB") +#' fetchAchillesHeelResults(connectionDetails, "scratch") +#' } +#' @export +fetchAchillesHeelResults <- function (connectionDetails, + resultsDatabaseSchema) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + sql <- SqlRender::renderSql(sql = "SELECT * FROM @resultsDatabaseSchema.achilles_heel_results", + resultsDatabaseSchema = resultsDatabaseSchema)$sql + + issues <- DatabaseConnector::querySql(connection = connection, sql = sql) + DatabaseConnector::disconnect(connection = connection) + + return (issues) +} + +#' @title fetchAchillesAnalysisResults +#' +#' @description +#' \code{fetchAchillesAnalysisResults} returns the results for one Achilles analysis Id. +#' +#' @details +#' See \code{data(analysesDetails)} for a list of all Achilles analyses and their Ids. +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param resultsDatabaseSchema Fully qualified name of database schema that we can fetch final results from. +#' On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'. +#' @param analysisId A single analysisId +#' +#' @return An object of type \code{achillesAnalysisResults} +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") +#' achillesResults <- achilles(connectionDetails, "cdm4_sim", "scratch", "TestDB") +#' fetchAchillesAnalysisResults(connectionDetails, "scratch",106) +#' } +#' @export +fetchAchillesAnalysisResults <- function (connectionDetails, + resultsDatabaseSchema, + analysisId) { + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + + sql <- "SELECT * FROM @resultsDatabaseSchema.ACHILLES_analysis WHERE analysis_id = @analysisId" + sql <- SqlRender::renderSql(sql = sql, + resultsDatabaseSchema = resultsDatabaseSchema, + analysisId = analysisId)$sql + analysisDetails <- DatabaseConnector::querySql(connection = connection, sql = sql) + + sql <- "SELECT * FROM @resultsDatabaseSchema.ACHILLES_results WHERE analysis_id = @analysisId" + sql <- SqlRender::renderSql(sql = sql, + resultsDatabaseSchema = resultsDatabaseSchema, + analysisId = analysisId)$sql + analysisResults <- DatabaseConnector::querySql(connection = connection, sql = sql) + + if (nrow(analysisResults) == 0){ + sql <- "SELECT * FROM @resultsDatabaseSchema.ACHILLES_results_dist WHERE analysis_id = @analysisId" + sql <- SqlRender::renderSql(sql = sql, + resultsDatabaseSchema = resultsDatabaseSchema, + analysisId = analysisId)$sql + analysisResults <- DatabaseConnector::querySql(connection = connection, sql = sql) + } + + colnames(analysisDetails) <- toupper(colnames(analysisDetails)) + colnames(analysisResults) <- toupper(colnames(analysisResults)) + + for (i in 1:5) { + stratumName <- analysisDetails[, paste("STRATUM", i, "NAME", sep="_")] + if (is.na(stratumName)){ + analysisResults[,paste("STRATUM", i, sep = "_")] <- NULL + } else { + colnames(analysisResults)[colnames(analysisResults) == paste("STRATUM", i, sep = "_")] <- toupper(stratumName) + } + } + + DatabaseConnector::disconnect(connection = connection) + + result <- list(analysisId = analysisId, + analysisName = analysisDetails$ANALYSIS_NAME, + analysisResults = analysisResults) + + class(result) <- "achillesAnalysisResults" + return (result) +} diff --git a/R/TestAchillesCode.R b/R/TestAchillesCode.R deleted file mode 100644 index c004722a..00000000 --- a/R/TestAchillesCode.R +++ /dev/null @@ -1,147 +0,0 @@ -# some test-code - -testAchillesCode <- function(){ - pw <- "" - #sqlServerServer <- "myserver" - #sqlServerResultsSchema <- "scratch" - #schema <- "cdm4_sim" - - sqlServerServer <- "myserver" - - sqlServerResultsSchema <- "my_cdm.dbo" - schema <- "my_cdm.dbo" - cdmVersion <- "4" - - sqlServerResultsSchema <- "my_cdm_v5.dbo" - schema <- "my_cdm_v5.dbo" - cdmVersion <- "5" - - #Test on SQL Server: - setwd("c:/temp") - connectionDetailsSqlServer <- DatabaseConnector::createConnectionDetails(dbms="sql server", server=sqlServerServer) - achillesResultsSqlServer <- achilles(connectionDetailsSqlServer, cdmDatabaseSchema=schema, resultsDatabaseSchema=sqlServerResultsSchema,cdmVersion=cdmVersion) - - - - sqlServerResultsSchema <- "my_cdm" - schema <- "my_cdm" - cdmVersion <- "4" - - schema <- "my_cdm_v5" - cdmVersion <- "5" - - #Test on PostgreSQL - setwd("c:/temp") - connectionDetailsPostgreSql <- DatabaseConnector::createConnectionDetails(dbms="postgresql", server="localhost/ohdsi", user="postgres",password=pw) - achillesResultsPostgreSql <- achilles(connectionDetailsPostgreSql, cdmDatabaseSchema=schema, resultsDatabaseSchema="scratch",cdmVersion=cdmVersion) - - #Test on Oracle - setwd("c:/temp") - connectionDetailsOracle <- DatabaseConnector::createConnectionDetails(dbms="oracle", server="xe", user="system",password="OHDSI2") - achillesResultsOracle <- achilles(connectionDetailsOracle, cdmDatabaseSchema=schema, oracleTempSchema = "temp", resultsDatabaseSchema="scratch",cdmVersion=cdmVersion) - - - #Compare results: - compareResults <- function(connection1, connection2){ - data(analysesDetails) - writeLines("Comparing results table") - for (analysis_id in analysesDetails$ANALYSIS_ID){ - x <- dbGetQuery(connection1,paste("SELECT * FROM ACHILLES_results WHERE analysis_id =",analysis_id)) - if (nrow(x) > 0){ - colnames(x) <- toupper(colnames(x)) - x <- x[with(x,order(STRATUM_1,STRATUM_2,STRATUM_3,STRATUM_4,STRATUM_5)),] - x[is.na(x)] <- "" - } - y <- dbGetQuery(connection2,paste("SELECT * FROM ACHILLES_results WHERE analysis_id =",analysis_id)) - if (nrow(y) > 0){ - colnames(y) <- toupper(colnames(y)) - y <- y[with(y,order(STRATUM_1,STRATUM_2,STRATUM_3,STRATUM_4,STRATUM_5)),] - y[is.na(y)] <- "" - } - if (!(nrow(x) == 0 && nrow(y) == 0)){ - x <- round(signif(x[sapply(x,FUN=is.numeric)],5),5) - y <- round(signif(y[sapply(y,FUN=is.numeric)],5),5) - if (nrow(x) != nrow(y)){ - writeLines(paste("Difference detected for analysisId",analysis_id)) - } else if (min(x==y) == 0){ - writeLines(paste("Difference detected for analysisId",analysis_id)) - if (analysis_id %in% c(818)){ - writeLines("(This was expected)") - }else { - count = 0 - for (r in 1:nrow(x)){ - if (min(x[r,] == y[r,]) == 0){ - col <- which(x[r,] != y[r,]) - writeLines(paste("Difference in",colnames(x)[col],":",x[r,col],"versus",y[r,col])) - count = count + 1 - if (count == 10){ - writeLines("...") - break; - } - } - } - } - } - } - } - - writeLines("Comparing results_dist table") - for (analysis_id in analysesDetails$ANALYSIS_ID){ - x <- dbGetQuery(connection1,paste("SELECT * FROM ACHILLES_results_dist WHERE analysis_id =",analysis_id)) - if (nrow(x) > 0){ - colnames(x) <- toupper(colnames(x)) - x <- x[with(x,order(STRATUM_1,STRATUM_2,STRATUM_3,STRATUM_4,STRATUM_5)),] - x[is.na(x)] <- "" - } - - y <- dbGetQuery(connection2,paste("SELECT * FROM ACHILLES_results_dist WHERE analysis_id =",analysis_id)) - if (nrow(y) > 0){ - colnames(y) <- toupper(colnames(y)) - y <- y[with(y,order(STRATUM_1,STRATUM_2,STRATUM_3,STRATUM_4,STRATUM_5)),] - y[is.na(y)] <- "" - } - if (!(nrow(x) == 0 && nrow(y) == 0)){ - #STRATUM_1 <- y$STRATUM_1 - x <- round(signif(x[sapply(x,FUN=is.numeric)],5),5) - y <- round(signif(y[sapply(y,FUN=is.numeric)],5),5) - if (nrow(x) != nrow(y)){ - writeLines(paste("Difference detected for analysisId",analysis_id)) - } else if (min(x==y) == 0){ - writeLines(paste("Difference detected for analysisId",analysis_id)) - count = 0 - for (r in 1:nrow(x)){ - if (min(x[r,] == y[r,]) == 0){ - col <- which(x[r,] != y[r,]) - #writeLines(paste("Difference in",colnames(x)[col],":",x[r,col],"versus",y[r,col]," (STRATUM_1:",STRATUM_1[r],")")) - writeLines(paste("Difference in",colnames(x)[col],":",x[r,col],"versus",y[r,col])) - count = count + 1 - if (count == 10){ - writeLines("...") - break; - } - } - } - } - } - } - } - - #Compare Sql Server and Postgres: - connectionDetailsSqlServer$schema = sqlServerResultsSchema - connSqlServer <- DatabaseConnector::connect(connectionDetailsSqlServer) - - connectionDetailsPostgreSql$schema = "scratch" - connPostgreSql <- DatabaseConnector::connect(connectionDetailsPostgreSql) - - compareResults(connSqlServer,connPostgreSql) - - #Compare Sql Server and Oracle: - connectionDetailsSqlServer$schema = sqlServerResultsSchema - connSqlServer <- DatabaseConnector::connect(connectionDetailsSqlServer) - - connectionDetailsOracle$schema = "scratch" - connOracle <- DatabaseConnector::connect(connectionDetailsOracle) - - compareResults(connOracle,connSqlServer) - #Note: differences will be found for 1411,1412 because of reverse sorting of dates due to different formats -} \ No newline at end of file diff --git a/R/TestAchillesViewResultsCode.R b/R/TestAchillesViewResultsCode.R deleted file mode 100644 index da4d89ee..00000000 --- a/R/TestAchillesViewResultsCode.R +++ /dev/null @@ -1,56 +0,0 @@ -#Requires that Achilles has been run first - -testAchillesViestResultsCode <- function(){ - #Test on SQL Server: - setwd("c:/temp") - connectionDetailsSqlServer <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - fetchAchillesHeelResults(connectionDetailsSqlServer, resultsDatabase="my_cdm") - fetchAchillesAnalysisResults(connectionDetailsSqlServer, resultsDatabase = "my_cdm", analysisId = 106) - - - pw <- "" - - ### Test Achilles heel part ### - - #Test on SQL Server - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - fetchAchillesHeelResults(connectionDetails, resultsDatabase = "scratch") - - #Test on PostgreSQL - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="postgresql", server="localhost/ohdsi", user="postgres",password=pw) - fetchAchillesHeelResults(connectionDetails, resultsDatabase = "scratch") - - - - #Test on Oracle - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="oracle", server="xe", user="system",password="OHDSI2") - fetchAchillesHeelResults(connectionDetails, resultsDatabase = "scratch") - - - ### Test Achilles analysis results view part ### - #Test on SQL Server - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - fetchAchillesAnalysisResults(connectionDetails, resultsDatabase = "scratch", analysisId = 106) - - #Test on PostgreSQL - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="postgresql", server="localhost/ohdsi", user="postgres",password=pw) - fetchAchillesAnalysisResults(connectionDetails, resultsDatabase = "scratch", analysisId = 106) - - - - #Test on Oracle - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="oracle", server="xe", user="system",password="OHDSI2") - fetchAchillesAnalysisResults(connectionDetails, resultsDatabase = "scratch", analysisId = 106) - - - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="oracle", server="xe", user="system",password=pw) - for (analysisId in analysesDetails$ANALYSIS_ID){ - results <- fetchAchillesAnalysisResults(connectionDetails, resultsDatabase = "scratch", analysisId = analysisId) - } -} \ No newline at end of file diff --git a/R/TestExportCode.R b/R/TestExportCode.R deleted file mode 100644 index c03df7df..00000000 --- a/R/TestExportCode.R +++ /dev/null @@ -1,76 +0,0 @@ -# some more test-code - -testExportCode <- function(){ - pw <- "" - #sqlServerServer <- "myserver" - #sqlServerresultsDatabaseSchema <- "scratch" - #schema <- "cdm4_sim" - - sqlServerServer <- "myserver" - - sqlServerresultsDatabaseSchema <- "cdm_truven_ccae_6k.dbo" - schema <- "cdm_truven_ccae_6k.dbo" - cdmVersion <- "4" - - sqlServerresultsDatabaseSchema <- "cdm_truven_ccae_6k_v5.dbo" - schema <- "cdm_truven_ccae_6k_v5.dbo" - cdmVersion <- "5" - - #Test on SQL Server - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server=sqlServerServer) - exportToJson(connectionDetails, cdmDatabaseSchema = schema, resultsDatabaseSchema = sqlServerresultsDatabaseSchema,outputPath = "c:/temp/SqlServer",cdmVersion=cdmVersion) - - #Test on PostgreSQL - - sqlServerresultsDatabaseSchema <- "cdm_truven_ccae_6k" - schema <- "cdm_truven_ccae_6k" - cdmVersion <- "4" - - sqlServerresultsDatabaseSchema <- "cdm_truven_ccae_6k_v5" - schema <- "cdm_truven_ccae_6k_v5" - cdmVersion <- "5" - - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="postgresql", server="localhost/ohdsi", user="postgres",password=pw) - exportToJson(connectionDetails, cdmDatabaseSchema = schema, resultsDatabaseSchema = "scratch",outputPath = "c:/temp/PostgreSQL",cdmVersion=cdmVersion) - - #Test on Oracle - setwd("c:/temp") - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="oracle", server="xe", user="system",password="OHDSI") - exportToJson(connectionDetails, cdmDatabaseSchema = schema, resultsDatabaseSchema = "scratch",outputPath = "c:/temp/Oracle",cdmVersion=cdmVersion) - - #Compare JSON files: - loadTextFile <- function(fileName){ - readChar(fileName, file.info(fileName)$size) - } - - compareJSONFiles <- function(folder1,folder2){ - setwd(folder1) - count <- 0 - for (f in list.files(pattern="*.\\.json",full.names=FALSE, recursive=TRUE)){ - count = count + 1 - file1 <- loadTextFile(paste(folder1,"/",f,sep="")) - file2 <- loadTextFile(paste(folder2,"/",f,sep="")) - - file1 <- gsub("\"NA\"","\"\"",file1) - file2 <- gsub("\"NA\"","\"\"",file2) - if (nchar(file1) != nchar(file2)){ - writeLines(paste("Warning: size mismatch in",f)) - } - } - writeLines(paste("Finished comparing",count,"files")) - } - - compareJSONFiles("c:/temp/oracle","c:/temp/postgresql") - - compareJSONFiles("c:/temp/postgresql","c:/temp/sqlserver") - - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="oracle", server="xe", user="system", schema="scratch",password=pw) - conn <- DatabaseConnector::connect(connectionDetails) - analysesDetails <- dbGetQuery(conn,"SELECT * FROM ACHILLES_ANALYSiS") - save(analysesDetails,"c:/temp/analysesDetails.rda") - DatabaseConnector::dbDisconnect(conn) -} - - diff --git a/R/addDatasource.R b/R/addDatasource.R old mode 100644 new mode 100755 index 1568da2b..c71dea72 --- a/R/addDatasource.R +++ b/R/addDatasource.R @@ -1,6 +1,6 @@ # @file exportToJson # -# Copyright 2014 Observational Health Data Sciences and Informatics +# Copyright 2018 Observational Health Data Sciences and Informatics # # This file is part of Achilles # @@ -28,38 +28,42 @@ #' If the datasources file exists, the data source will be added to the file. #' If the datasources file does not exist, a new file wil be initialized with the specified data source. #' -#' @param jsonFolderPath Path of the Json files generated by \code{exportToJson}. -#' @param dataName Name of Achilles report. Default is the base folder of \code{jsonFolderPath}. -#' @param datasourcePath Path where datasource file will be saved. Default is one folder above the \code{jsonFolderPath} -#' @param datasourcesFilename Name of the file where the datasource is located or stored. Default is "datasources.json". -#' @param additionalParam A R list specifying which additinal parameters to write to the datasource object. Default is \code{list(cdmVersion=5)}. +#' @param jsonFolderPath Path of the Json files generated by \code{exportToJson}. +#' @param dataName Name of Achilles report. Default is the base folder of \code{jsonFolderPath}. +#' @param datasourcePath Path where datasource file will be saved. Default is one folder above the \code{jsonFolderPath} +#' @param datasourcesFilename Name of the file where the datasource is located or stored. Default is "datasources.json". +#' @param additionalParam A R list specifying which additinal parameters to write to the datasource object. Default is \code{list(cdmVersion=5)}. #' #' @return none #' -#' @examples \dontrun{ -#' jsonFolderPath <- "your/output/path" -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportToJson(connectionDetails, cdmDatabaseSchema="cdm5", outputPath=jsonFolderPath) -#' addDatasource(jsonFolderPath, "your_data_name") -#' } +#' @examples \dontrun{ +#' jsonFolderPath <- "your/output/path" +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportToJson(connectionDetails, cdmDatabaseSchema="cdm5", outputPath=jsonFolderPath) +#' addDatasource(jsonFolderPath, "your_data_name") +#' } #' @export -addDatasource <- function(jsonFolderPath, dataName = NULL, datasourcePath = NULL, datasourcesFilename = "datasources.json", additionalParam = list(cdmVersion = 5) ) +addDatasource <- function(jsonFolderPath, + dataName = NULL, + datasourcePath = NULL, + datasourcesFilename = "datasources.json", + additionalParam = list(cdmVersion = 5) ) { # Parse the folder path. folderName <- basename(jsonFolderPath) - if( is.null(dataName) ){ + if (is.null(dataName)) { dataName <- folderName } # Path to data source file - if( is.null(datasourcePath) ){ - datasourcePath <- file.path( dirname(jsonFolderPath), datasourcesFilename ) + if (is.null(datasourcePath)) { + datasourcePath <- file.path( dirname(jsonFolderPath), datasourcesFilename) } # Read the json file or create new if not exists - if( file.exists(datasourcePath) ){ + if (file.exists(datasourcePath)) { print(paste("Writing to existing datasources file: ", datasourcePath)) - j <- rjson::fromJSON( file = datasourcePath ) + j <- rjson::fromJSON(file = datasourcePath) } else { print(paste("Creating a new datasources file: ", datasourcePath)) j <- rjson::fromJSON('{"datasources":[]}') diff --git a/R/exportToJson.R b/R/exportToJson.R old mode 100644 new mode 100755 index 7869a369..3d7148eb --- a/R/exportToJson.R +++ b/R/exportToJson.R @@ -1,2227 +1,2239 @@ -# @file exportToJson -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of Achilles -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics -# @author Chris Knoll -# @author Frank DeFalco - - -# Run this definition of allReports when adding a new report -# allReports <- c("CONDITION", -# "CONDITION_ERA", -# "DASHBOARD", -# "DATA_DENSITY", -# "DEATH", -# "DRUG", -# "DRUG_ERA", -# "HEEL", -# "OBSERVATION", -# "OBSERVATION_PERIOD", -# "PERSON", -# "PROCEDURE", -# "VISIT", -# "MEASUREMENT", -# "META") -# save(allReports,file="data/allReports.rda") - -initOutputPath <- function (outputPath){ - # create output path if it doesn't already exist, warn if it does - if (file.exists(outputPath)){ - writeLines(paste("Warning: folder",outputPath,"already exists")) - } else { - dir.create(paste(outputPath,"/",sep="")) - } -} - -#' @title showReportTypes -#' -#' @description -#' \code{showReportTypes} Displays the Report Types that can be passed as vector values to exportToJson. -#' -#' @details -#' exportToJson supports the following report types: -#' "CONDITION","CONDITION_ERA", "DASHBOARD", "DATA_DENSITY", "DEATH", "DRUG", "DRUG_ERA", "HEEL", "META", "OBSERVATION", "OBSERVATION_PERIOD", "PERSON", "PROCEDURE","VISIT" -#' -#' @return none (opens the allReports vector in a View() display) -#' @examples \dontrun{ -#' showReportTypes() -#' } -#' @export -showReportTypes <- function() -{ - utils::View(allReports) -} - -#' @title exportToJson -#' -#' @description -#' \code{exportToJson} Exports Achilles statistics into a JSON form for reports. -#' -#' @details -#' Creates individual files for each report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the OMOP CDM. -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param reports A character vector listing the set of reports to generate. Default is all reports. -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' See \code{data(allReports)} for a list of all report types -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), reports = allReports, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - start <- Sys.time() - if (missing(resultsDatabaseSchema)) - resultsDatabaseSchema <- cdmDatabaseSchema - - initOutputPath(outputPath) - - # connect to the results schema - connectionDetails$schema = resultsDatabaseSchema - conn <- DatabaseConnector::connect(connectionDetails) - - # generate reports - - if ("CONDITION" %in% reports) - { - generateConditionTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateConditionReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("CONDITION_ERA" %in% reports) - { - generateConditionEraTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateConditionEraReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("DATA_DENSITY" %in% reports) - generateDataDensityReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - - if ("DEATH" %in% reports) - { - generateDeathReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("DRUG_ERA" %in% reports) - { - generateDrugEraTreemap(conn,connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateDrugEraReports(conn,connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("DRUG" %in% reports) - { - generateDrugTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateDrugReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("HEEL" %in% reports) - { - generateAchillesHeelReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if (("META" %in% reports) & (cdmVersion != "4")) - { - generateDomainMetaReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ( ("MEASUREMENT" %in% reports) & (cdmVersion != "4")) - { - generateMeasurementTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateMeasurementReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - - if ("OBSERVATION" %in% reports) - { - generateObservationTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateObservationReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("OBSERVATION_PERIOD" %in% reports) - generateObservationPeriodReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - - if ("PERSON" %in% reports) - generatePersonReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - - if ("PROCEDURE" %in% reports) - { - generateProcedureTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateProcedureReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - if ("VISIT" %in% reports) - { - generateVisitTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - generateVisitReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion, vocabDatabaseSchema) - } - - # dashboard is always last - if ("DASHBOARD" %in% reports) - { - generateDashboardReport(outputPath) - } - - DatabaseConnector::disconnect(conn) - - delta <- Sys.time() - start - writeLines(paste("Export took", signif(delta,3), attr(delta,"units"))) - writeLines(paste("JSON files can now be found in",outputPath)) -} - -#' @title exportConditionToJson -#' -#' @description -#' \code{exportConditonToJson} Exports Achilles Condition report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Condition report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportConditionToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportConditionToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("CONDITION"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportConditionEraToJson -#' -#' @description -#' \code{exportConditionEraToJson} Exports Achilles Condition Era report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Condition Era report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportConditionEraToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportConditionEraToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("CONDITION_ERA"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportDashboardToJson -#' -#' @description -#' \code{exportDashboardToJson} Exports Achilles Dashboard report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Dashboard report found in Achilles.Web. NOTE: This function reads the results -#' from the other exports and aggregates them into a single file. If other reports are not genreated, this function will fail. -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportDashboardToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportDashboardToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DASHBOARD"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportDataDensityToJson -#' -#' @description -#' \code{exportDataDensityToJson} Exports Achilles Data Density report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Data Density report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportDataDensityToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportDataDensityToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DATA_DENSITY"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportDeathToJson -#' -#' @description -#' \code{exportDeathToJson} Exports Achilles Death report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Death report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportDeathToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportDeathToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DEATH"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportDrugToJson -#' -#' @description -#' \code{exportDrugToJson} Exports Achilles Drug report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Drug report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportDrugToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportDrugToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DRUG"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportDrugEraToJson -#' -#' @description -#' \code{exportDrugEraToJson} Exports Achilles Drug Era report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Drug Era report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportDrugEraToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportDrugEraToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DRUG_ERA"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportHeelToJson -#' -#' @description -#' \code{exportHeelToJson} Exports Achilles Heel report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Achilles Heel report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportHeelToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportHeelToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("HEEL"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportMetaToJson -#' -#' @description -#' \code{exportMetaToJson} Exports Achilles Heel report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Achilles Heel report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportMetaToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportMetaToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("META"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportMeasurementToJson -#' -#' @description -#' \code{exportMeasurementToJson} Exports Measurement report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Measurement report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportMeasurementToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportMeasurementToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("MEASUREMENT"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportObservationToJson -#' -#' @description -#' \code{exportObservationToJson} Exports Achilles Observation report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Observation report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportObservationToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportObservationToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("OBSERVATION"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportObservationPeriodToJson -#' -#' @description -#' \code{exportObservationPeriodToJson} Exports Achilles Observation Period report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Observation Period report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportObservationPeriodToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportObservationPeriodToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("OBSERVATION_PERIOD"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportPersonToJson -#' -#' @description -#' \code{exportPersonToJson} Exports Achilles Person report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Person report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportPersonToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportPersonToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("PERSON"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportProcedureToJson -#' -#' @description -#' \code{exportProcedureToJson} Exports Achilles Procedure report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Procedure report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportProcedureToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportProcedureToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("PROCEDURE"), cdmVersion, vocabDatabaseSchema) -} - -#' @title exportVisitToJson -#' -#' @description -#' \code{exportVisitToJson} Exports Achilles Visit report into a JSON form for reports. -#' -#' @details -#' Creates individual files for Visit report found in Achilles.Web -#' -#' -#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) -#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files -#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema -#' @param outputPath A folder location to save the JSON files. Default is current working folder -#' @param cdmVersion Define the OMOP CDM version used: currently support "4" and "5". Default = "4" -#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. -#' -#' @return none -#' @examples \dontrun{ -#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -#' exportVisitToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") -#' } -#' @export -exportVisitToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), cdmVersion="4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("VISIT"), cdmVersion, vocabDatabaseSchema) -} - -addCdmVersionPath <- function(sqlFilename,cdmVersion){ - if (cdmVersion == "4") { - sqlFolder <- "export_v4" - } else if (cdmVersion == "5") { - sqlFolder <- "export_v5" - } else { - stop("Error: Invalid CDM Version number, use 4 or 5") - } - paste(sqlFolder,sqlFilename,sep="") -} - -generateAchillesHeelReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating achilles heel report") - output <- {} - - queryAchillesHeel <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/achillesheel/sqlAchillesHeel.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - output$MESSAGES <- DatabaseConnector::querySql(conn,queryAchillesHeel) - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/achillesheel.json", sep="")) -} - -generateDomainMetaReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating domain meta report") - output <- {} - - queryDomainMeta <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/domainmeta/sqlDomainMeta.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema - ) - - if ("CDM_DOMAIN_META" %in% DatabaseConnector::getTableNames(connection = conn, databaseSchema = cdmDatabaseSchema)) - { - output$MESSAGES <- DatabaseConnector::querySql(conn, queryDomainMeta) - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/domainmeta.json", sep="")) - } - else - { - writeLines("No CDM_DOMAIN_META table found, skipping export") - } -} - -generateDrugEraTreemap <- function(conn, dbms,cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating drug era treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - -queryDrugEraTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drugera/sqlDrugEraTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataDrugEraTreemap <- DatabaseConnector::querySql(conn,queryDrugEraTreemap) - - write(rjson::toJSON(dataDrugEraTreemap,method="C"),paste(outputPath, "/drugera_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateDrugTreemap <- function(conn, dbms,cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating drug treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryDrugTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlDrugTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataDrugTreemap <- DatabaseConnector::querySql(conn,queryDrugTreemap) - - write(rjson::toJSON(dataDrugTreemap,method="C"),paste(outputPath, "/drug_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateConditionTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating condition treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryConditionTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/condition/sqlConditionTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataConditionTreemap <- DatabaseConnector::querySql(conn,queryConditionTreemap) - - write(rjson::toJSON(dataConditionTreemap,method="C"),paste(outputPath, "/condition_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateConditionEraTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating condition era treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryConditionEraTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/conditionera/sqlConditionEraTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataConditionEraTreemap <- DatabaseConnector::querySql(conn,queryConditionEraTreemap) - - write(rjson::toJSON(dataConditionEraTreemap,method="C"),paste(outputPath, "/conditionera_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateConditionReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating condition reports") - - treemapFile <- file.path(outputPath,"condition_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - - conditionsFolder <- file.path(outputPath,"conditions") - if (file.exists(conditionsFolder)){ - writeLines(paste("Warning: folder ",conditionsFolder," already exists")) - } else { - dir.create(paste(conditionsFolder,"/",sep="")) - - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/condition/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/condition/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryConditionsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/condition/sqlConditionsByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryAgeAtFirstDiagnosis <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/condition/sqlAgeAtFirstDiagnosis.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataConditionsByType <- DatabaseConnector::querySql(conn,queryConditionsByType) - dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn,queryAgeAtFirstDiagnosis) - - - buildConditionReport <- function(concept_id) { - report <- {} - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] - report$CONDITIONS_BY_TYPE <- dataConditionsByType[dataConditionsByType$CONDITION_CONCEPT_ID == concept_id,c(4,5)] - report$AGE_AT_FIRST_DIAGNOSIS <- dataAgeAtFirstDiagnosis[dataAgeAtFirstDiagnosis$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - filename <- paste(outputPath, "/conditions/condition_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildConditionReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) -} - -generateConditionEraReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating condition era reports") - - treemapFile <- file.path(outputPath,"conditionera_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - conditionsFolder <- file.path(outputPath,"conditioneras") - if (file.exists(conditionsFolder)){ - writeLines(paste("Warning: folder ",conditionsFolder," already exists")) - } else { - dir.create(paste(conditionsFolder,"/",sep="")) - - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/conditionera/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/conditionera/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryAgeAtFirstDiagnosis <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/conditionera/sqlAgeAtFirstDiagnosis.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryLengthOfEra <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/conditionera/sqlLengthOfEra.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataLengthOfEra <- DatabaseConnector::querySql(conn,queryLengthOfEra) - dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn,queryAgeAtFirstDiagnosis) - - - buildConditionEraReport <- function(concept_id) { - report <- {} - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(2,3,4,5)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(2,3)] - report$LENGTH_OF_ERA <- dataLengthOfEra[dataLengthOfEra$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$AGE_AT_FIRST_DIAGNOSIS <- dataAgeAtFirstDiagnosis[dataAgeAtFirstDiagnosis$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - filename <- paste(outputPath, "/conditioneras/condition_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildConditionEraReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) -} - -generateDrugEraReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating drug era reports") - - - treemapFile <- file.path(outputPath,"drugera_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - - drugerasFolder <- file.path(outputPath,"drugeras") - if (file.exists(drugerasFolder)){ - writeLines(paste("Warning: folder ",drugerasFolder," already exists")) - } else { - dir.create(paste(drugerasFolder,"/",sep="")) - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drugera/sqlAgeAtFirstExposure.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drugera/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drugera/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryLengthOfEra <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drugera/sqlLengthOfEra.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataLengthOfEra <- DatabaseConnector::querySql(conn,queryLengthOfEra) - - buildDrugEraReport <- function(concept_id) { - report <- {} - report$AGE_AT_FIRST_EXPOSURE <- dataAgeAtFirstExposure[dataAgeAtFirstExposure$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(2,3,4,5)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(2,3)] - report$LENGTH_OF_ERA <- dataLengthOfEra[dataLengthOfEra$CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] - - filename <- paste(outputPath, "/drugeras/drug_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildDrugEraReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) -} - -generateDrugReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating drug reports") - - treemapFile <- file.path(outputPath,"drug_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - drugsFolder <- file.path(outputPath,"drugs") - if (file.exists(drugsFolder)){ - writeLines(paste("Warning: folder ",drugsFolder," already exists")) - } else { - dir.create(paste(drugsFolder,"/",sep="")) - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlAgeAtFirstExposure.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryDaysSupplyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlDaysSupplyDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryDrugsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlDrugsByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryDrugFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlFrequencyDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryQuantityDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlQuantityDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryRefillsDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/drug/sqlRefillsDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) - dataDaysSupplyDistribution <- DatabaseConnector::querySql(conn,queryDaysSupplyDistribution) - dataDrugsByType <- DatabaseConnector::querySql(conn,queryDrugsByType) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataQuantityDistribution <- DatabaseConnector::querySql(conn,queryQuantityDistribution) - dataRefillsDistribution <- DatabaseConnector::querySql(conn,queryRefillsDistribution) - dataDrugFrequencyDistribution <- DatabaseConnector::querySql(conn,queryDrugFrequencyDistribution) - - buildDrugReport <- function(concept_id) { - report <- {} - report$AGE_AT_FIRST_EXPOSURE <- dataAgeAtFirstExposure[dataAgeAtFirstExposure$DRUG_CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$DAYS_SUPPLY_DISTRIBUTION <- dataDaysSupplyDistribution[dataDaysSupplyDistribution$DRUG_CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] - report$DRUGS_BY_TYPE <- dataDrugsByType[dataDrugsByType$DRUG_CONCEPT_ID == concept_id, c(3,4)] - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] - report$DRUG_FREQUENCY_DISTRIBUTION <- dataDrugFrequencyDistribution[dataDrugFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] - report$QUANTITY_DISTRIBUTION <- dataQuantityDistribution[dataQuantityDistribution$DRUG_CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] - report$REFILLS_DISTRIBUTION <- dataRefillsDistribution[dataRefillsDistribution$DRUG_CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] - - filename <- paste(outputPath, "/drugs/drug_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildDrugReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) -} - -generateProcedureTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating procedure treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryProcedureTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/procedure/sqlProcedureTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataProcedureTreemap <- DatabaseConnector::querySql(conn,queryProcedureTreemap) - - write(rjson::toJSON(dataProcedureTreemap,method="C"),paste(outputPath, "/procedure_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateProcedureReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating procedure reports") - - treemapFile <- file.path(outputPath,"procedure_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - proceduresFolder <- file.path(outputPath,"procedures") - if (file.exists(proceduresFolder)){ - writeLines(paste("Warning: folder ",proceduresFolder," already exists")) - } else { - dir.create(paste(proceduresFolder,"/",sep="")) - - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/procedure/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/procedure/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryProcedureFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/procedure/sqlFrequencyDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryProceduresByType <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/procedure/sqlProceduresByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/procedure/sqlAgeAtFirstOccurrence.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataProceduresByType <- DatabaseConnector::querySql(conn,queryProceduresByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - dataProcedureFrequencyDistribution <- DatabaseConnector::querySql(conn,queryProcedureFrequencyDistribution) - - buildProcedureReport <- function(concept_id) { - report <- {} - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] - report$PROCEDURE_FREQUENCY_DISTRIBUTION <- dataProcedureFrequencyDistribution[dataProcedureFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] - report$PROCEDURES_BY_TYPE <- dataProceduresByType[dataProceduresByType$PROCEDURE_CONCEPT_ID == concept_id,c(4,5)] - report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - filename <- paste(outputPath, "/procedures/procedure_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildProcedureReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) -} - -generatePersonReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - writeLines("Generating person reports") - progressBar <- utils::txtProgressBar(max=7,style=3) - progress = 0 - output = {} - - # 1. Title: Population - # a. Visualization: Table - # b. Row #1: CDM source name - # c. Row #2: # of persons - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/person/population.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - personSummaryData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - output$SUMMARY = personSummaryData - - # 2. Title: Gender distribution - # a. Visualization: Pie - # b. Category: Gender - # c. Value: % of persons - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/person/gender.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - genderData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - output$GENDER_DATA = genderData - - # 3. Title: Race distribution - # a. Visualization: Pie - # b. Category: Race - # c. Value: % of persons - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/person/race.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - raceData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - output$RACE_DATA = raceData - - # 4. Title: Ethnicity distribution - # a. Visualization: Pie - # b. Category: Ethnicity - # c. Value: % of persons - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/person/ethnicity.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - ethnicityData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - output$ETHNICITY_DATA = ethnicityData - - # 5. Title: Year of birth distribution - # a. Visualization: Histogram - # b. Category: Year of birth - # c. Value: # of persons - birthYearHist <- {} - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/person/yearofbirth_stats.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - birthYearStats <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - birthYearHist$MIN = birthYearStats$MIN_VALUE - birthYearHist$MAX = birthYearStats$MAX_VALUE - birthYearHist$INTERVAL_SIZE = birthYearStats$INTERVAL_SIZE - birthYearHist$INTERVALS = (birthYearStats$MAX_VALUE - birthYearStats$MIN_VALUE) / birthYearStats$INTERVAL_SIZE - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/person/yearofbirth_data.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - birthYearData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - birthYearHist$DATA <- birthYearData - - output$BIRTH_YEAR_HISTOGRAM <- birthYearHist - - # Convert to JSON and save file result - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/person.json", sep="")) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateObservationPeriodReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - writeLines("Generating observation period reports") - progressBar <- utils::txtProgressBar(max=11,style=3) - progress = 0 - output = {} - - # 1. Title: Age at time of first observation - # a. Visualization: Histogram - # b. Category: Age - # c. Value: # of persons - - ageAtFirstObservationHist <- {} - - # stats are hard coded for this result to make x-axis consistent across datasources - ageAtFirstObservationHist$MIN = 0 - ageAtFirstObservationHist$MAX =100 - ageAtFirstObservationHist$INTERVAL_SIZE = 1 - ageAtFirstObservationHist$INTERVALS = 100 - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/ageatfirst.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - ageAtFirstObservationData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - ageAtFirstObservationHist$DATA = ageAtFirstObservationData - output$AGE_AT_FIRST_OBSERVATION_HISTOGRAM <- ageAtFirstObservationHist - - # 2. Title: Age by gender - # a. Visualization: Side-by-side boxplot - # b. Category: Gender - # c. Values: Min/25%/Median/95%/Max - age at time of first observation - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/agebygender.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - ageByGenderData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$AGE_BY_GENDER = ageByGenderData - - # 3. Title: Length of observation - # a. Visualization: bar - # b. Category: length of observation period, 30d increments - # c. Values: # of persons - - observationLengthHist <- {} - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observationlength_stats.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - observationLengthStats <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - observationLengthHist$MIN = observationLengthStats$MIN_VALUE - observationLengthHist$MAX = observationLengthStats$MAX_VALUE - observationLengthHist$INTERVAL_SIZE = observationLengthStats$INTERVAL_SIZE - observationLengthHist$INTERVALS = (observationLengthStats$MAX_VALUE - observationLengthStats$MIN_VALUE) / observationLengthStats$INTERVAL_SIZE - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observationlength_data.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - observationLengthData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - observationLengthHist$DATA <- observationLengthData - - output$OBSERVATION_LENGTH_HISTOGRAM = observationLengthHist - - # 4. Title: Cumulative duration of observation - # a. Visualization: scatterplot - # b. X-axis: length of observation period - # c. Y-axis: % of population observed - # d. Note: will look like a Kaplan-Meier ‘survival’ plot, but information is the same as shown in ‘length of observation’ barchart, just plotted as cumulative - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/cumulativeduration.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - cumulativeDurationData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$CUMULATIVE_DURATION = cumulativeDurationData - - # 5. Title: Observation period length distribution, by gender - # a. Visualization: side-by-side boxplot - # b. Category: Gender - # c. Values: Min/25%/Median/95%/Max length of observation period - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observationlengthbygender.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - opLengthByGenderData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$OBSERVATION_PERIOD_LENGTH_BY_GENDER = opLengthByGenderData - - # 6. Title: Observation period length distribution, by age - # a. Visualization: side-by-side boxplot - # b. Category: Age decile - # c. Values: Min/25%/Median/95%/Max length of observation period - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observationlengthbyage.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - opLengthByAgeData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$OBSERVATION_PERIOD_LENGTH_BY_AGE = opLengthByAgeData - - # 7. Title: Number of persons with continuous observation by year - # a. Visualization: Histogram - # b. Category: Year - # c. Values: # of persons with continuous coverage - - observedByYearHist <- {} - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observedbyyear_stats.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - observedByYearStats <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - observedByYearHist$MIN = observedByYearStats$MIN_VALUE - observedByYearHist$MAX = observedByYearStats$MAX_VALUE - observedByYearHist$INTERVAL_SIZE = observedByYearStats$INTERVAL_SIZE - observedByYearHist$INTERVALS = (observedByYearStats$MAX_VALUE - observedByYearStats$MIN_VALUE) / observedByYearStats$INTERVAL_SIZE - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observedbyyear_data.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - observedByYearData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - observedByYearHist$DATA <- observedByYearData - - output$OBSERVED_BY_YEAR_HISTOGRAM = observedByYearHist - - # 8. Title: Number of persons with continuous observation by month - # a. Visualization: Histogram - # b. Category: Month/year - # c. Values: # of persons with continuous coverage - - observedByMonth <- {} - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/observedbymonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - observedByMonth <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - output$OBSERVED_BY_MONTH = observedByMonth - - # 9. Title: Number of observation periods per person - # a. Visualization: Pie - # b. Category: Number of observation periods - # c. Values: # of persons - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observationperiod/periodsperperson.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - personPeriodsData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$PERSON_PERIODS_DATA = personPeriodsData - - # Convert to JSON and save file result - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/observationperiod.json", sep="")) - close(progressBar) -} - -generateDashboardReport <- function(outputPath) -{ - writeLines("Generating dashboard report") - output <- {} - - progressBar <- utils::txtProgressBar(max=4,style=3) - progress = 0 - - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - personReport <- rjson::fromJSON(file = paste(outputPath, "/person.json", sep="")) - output$SUMMARY <- personReport$SUMMARY - output$GENDER_DATA <- personReport$GENDER_DATA - - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - opReport <- rjson::fromJSON(file = paste(outputPath, "/observationperiod.json", sep="")) - - output$AGE_AT_FIRST_OBSERVATION_HISTOGRAM = opReport$AGE_AT_FIRST_OBSERVATION_HISTOGRAM - output$CUMULATIVE_DURATION = opReport$CUMULATIVE_DURATION - output$OBSERVED_BY_MONTH = opReport$OBSERVED_BY_MONTH - - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/dashboard.json", sep="")) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateDataDensityReport <- function(conn, dbms,cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - writeLines("Generating data density reports") - progressBar <- utils::txtProgressBar(max=3,style=3) - progress = 0 - output = {} - - # 1. Title: Total records - # a. Visualization: scatterplot - # b. X-axis: month/year - # c. y-axis: records - # d. series: person, visit, condition, drug, procedure, observation - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/datadensity/totalrecords.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - totalRecordsData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$TOTAL_RECORDS = totalRecordsData - - # 2. Title: Records per person - # a. Visualization: scatterplot - # b. X-axis: month/year - # c. y-axis: records/person - # d. series: person, visit, condition, drug, procedure, observation - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/datadensity/recordsperperson.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - recordsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$RECORDS_PER_PERSON = recordsPerPerson - - # 3. Title: Concepts per person - # a. Visualization: side-by-side boxplot - # b. Category: Condition/Drug/Procedure/Observation - # c. Values: Min/25%/Median/95%/Max number of distinct concepts per person - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/datadensity/conceptsperperson.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - conceptsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$CONCEPTS_PER_PERSON = conceptsPerPerson - - # Convert to JSON and save file result - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/datadensity.json", sep="")) - close(progressBar) - -} - -generateMeasurementTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating measurement treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryMeasurementTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlMeasurementTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataMeasurementTreemap <- DatabaseConnector::querySql(conn,queryMeasurementTreemap) - - write(rjson::toJSON(dataMeasurementTreemap,method="C"),paste(outputPath, "/measurement_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) - -} - -generateMeasurementReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - writeLines("Generating Measurement reports") - - treemapFile <- file.path(outputPath,"measurement_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - measurementsFolder <- file.path(outputPath,"measurements") - if (file.exists(measurementsFolder)){ - writeLines(paste("Warning: folder ",measurementsFolder," already exists")) - } else { - dir.create(paste(measurementsFolder,"/",sep="")) - - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlFrequencyDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryMeasurementsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlMeasurementsByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlAgeAtFirstOccurrence.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryRecordsByUnit <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlRecordsByUnit.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryMeasurementValueDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlMeasurementValueDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryLowerLimitDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlLowerLimitDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryUpperLimitDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlUpperLimitDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryValuesRelativeToNorm <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/measurement/sqlValuesRelativeToNorm.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataMeasurementsByType <- DatabaseConnector::querySql(conn,queryMeasurementsByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - dataRecordsByUnit <- DatabaseConnector::querySql(conn,queryRecordsByUnit) - dataMeasurementValueDistribution <- DatabaseConnector::querySql(conn,queryMeasurementValueDistribution) - dataLowerLimitDistribution <- DatabaseConnector::querySql(conn,queryLowerLimitDistribution) - dataUpperLimitDistribution <- DatabaseConnector::querySql(conn,queryUpperLimitDistribution) - dataValuesRelativeToNorm <- DatabaseConnector::querySql(conn,queryValuesRelativeToNorm) - dataFrequencyDistribution <- DatabaseConnector::querySql(conn,queryFrequencyDistribution) - - buildMeasurementReport <- function(concept_id) { - report <- {} - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] - report$FREQUENCY_DISTRIBUTION <- dataFrequencyDistribution[dataFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] - report$MEASUREMENTS_BY_TYPE <- dataMeasurementsByType[dataMeasurementsByType$MEASUREMENT_CONCEPT_ID == concept_id,c(4,5)] - report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - - report$RECORDS_BY_UNIT <- dataRecordsByUnit[dataRecordsByUnit$MEASUREMENT_CONCEPT_ID == concept_id,c(4,5)] - report$MEASUREMENT_VALUE_DISTRIBUTION <- dataMeasurementValueDistribution[dataMeasurementValueDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$LOWER_LIMIT_DISTRIBUTION <- dataLowerLimitDistribution[dataLowerLimitDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$UPPER_LIMIT_DISTRIBUTION <- dataUpperLimitDistribution[dataUpperLimitDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$VALUES_RELATIVE_TO_NORM <- dataValuesRelativeToNorm[dataValuesRelativeToNorm$MEASUREMENT_CONCEPT_ID == concept_id,c(4,5)] - - filename <- paste(outputPath, "/measurements/measurement_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildMeasurementReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) - -} - -generateObservationTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) { - writeLines("Generating observation treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryObservationTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlObservationTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataObservationTreemap <- DatabaseConnector::querySql(conn,queryObservationTreemap) - - write(rjson::toJSON(dataObservationTreemap,method="C"),paste(outputPath, "/observation_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) - -} - -generateObservationReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema) -{ - writeLines("Generating Observation reports") - - treemapFile <- file.path(outputPath,"observation_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - observationsFolder <- file.path(outputPath,"observations") - if (file.exists(observationsFolder)){ - writeLines(paste("Warning: folder ",observationsFolder," already exists")) - } else { - dir.create(paste(observationsFolder,"/",sep="")) - - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryObsFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlFrequencyDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryObservationsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlObservationsByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlAgeAtFirstOccurrence.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - if (cdmVersion == "4") - { - - queryRecordsByUnit <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlRecordsByUnit.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryObservationValueDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlObservationValueDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryLowerLimitDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlLowerLimitDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryUpperLimitDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlUpperLimitDistribution.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryValuesRelativeToNorm <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/observation/sqlValuesRelativeToNorm.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - } - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataObservationsByType <- DatabaseConnector::querySql(conn,queryObservationsByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - dataObsFrequencyDistribution <- DatabaseConnector::querySql(conn,queryObsFrequencyDistribution) - if (cdmVersion == "4") - { - dataRecordsByUnit <- DatabaseConnector::querySql(conn,queryRecordsByUnit) - dataObservationValueDistribution <- DatabaseConnector::querySql(conn,queryObservationValueDistribution) - dataLowerLimitDistribution <- DatabaseConnector::querySql(conn,queryLowerLimitDistribution) - dataUpperLimitDistribution <- DatabaseConnector::querySql(conn,queryUpperLimitDistribution) - dataValuesRelativeToNorm <- DatabaseConnector::querySql(conn,queryValuesRelativeToNorm) - } - - buildObservationReport <- function(concept_id) { - report <- {} - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] - report$OBS_FREQUENCY_DISTRIBUTION <- dataObsFrequencyDistribution[dataObsFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] - report$OBSERVATIONS_BY_TYPE <- dataObservationsByType[dataObservationsByType$OBSERVATION_CONCEPT_ID == concept_id,c(4,5)] - report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - - if (cdmVersion == "4") - { - report$RECORDS_BY_UNIT <- dataRecordsByUnit[dataRecordsByUnit$OBSERVATION_CONCEPT_ID == concept_id,c(4,5)] - report$OBSERVATION_VALUE_DISTRIBUTION <- dataObservationValueDistribution[dataObservationValueDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$LOWER_LIMIT_DISTRIBUTION <- dataLowerLimitDistribution[dataLowerLimitDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$UPPER_LIMIT_DISTRIBUTION <- dataUpperLimitDistribution[dataUpperLimitDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$VALUES_RELATIVE_TO_NORM <- dataValuesRelativeToNorm[dataValuesRelativeToNorm$OBSERVATION_CONCEPT_ID == concept_id,c(4,5)] - } - - filename <- paste(outputPath, "/observations/observation_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildObservationReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) - -} - -generateVisitTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema){ - writeLines("Generating visit_occurrence treemap") - progressBar <- utils::txtProgressBar(max=1,style=3) - progress = 0 - - queryVisitTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/visit/sqlVisitTreemap.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataVisitTreemap <- DatabaseConnector::querySql(conn,queryVisitTreemap) - - write(rjson::toJSON(dataVisitTreemap,method="C"),paste(outputPath, "/visit_treemap.json", sep='')) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - - close(progressBar) -} - -generateVisitReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema){ - writeLines("Generating visit reports") - - treemapFile <- file.path(outputPath,"visit_treemap.json") - if (!file.exists(treemapFile)){ - writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) - return() - } - - treemapData <- rjson::fromJSON(file = treemapFile) - uniqueConcepts <- unique(treemapData$CONCEPT_ID) - totalCount <- length(uniqueConcepts) - - visitsFolder <- file.path(outputPath,"visits") - if (file.exists(visitsFolder)){ - writeLines(paste("Warning: folder ",visitsFolder," already exists")) - } else { - dir.create(paste(visitsFolder,"/",sep="")) - - } - - progressBar <- utils::txtProgressBar(style=3) - progress = 0 - - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/visit/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/visit/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryVisitDurationByType <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/visit/sqlVisitDurationByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/visit/sqlAgeAtFirstOccurrence.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataVisitDurationByType <- DatabaseConnector::querySql(conn,queryVisitDurationByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - - buildVisitReport <- function(concept_id) { - report <- {} - report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] - report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] - report$VISIT_DURATION_BY_TYPE <- dataVisitDurationByType[dataVisitDurationByType$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] - filename <- paste(outputPath, "/visits/visit_" , concept_id , ".json", sep='') - - write(rjson::toJSON(report,method="C"),filename) - - #Update progressbar: - env <- parent.env(environment()) - curVal <- get("progress", envir = env) - assign("progress", curVal +1 ,envir= env) - utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) - } - - dummy <- lapply(uniqueConcepts, buildVisitReport) - - utils::setTxtProgressBar(progressBar, 1) - close(progressBar) -} - -generateDeathReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, cdmVersion = "4", vocabDatabaseSchema = cdmDatabaseSchema){ - writeLines("Generating death reports") - progressBar <- utils::txtProgressBar(max=4,style=3) - progress = 0 - output = {} - - # 1. Title: Prevalence drilldown, prevalence by gender, age, and year - # a. Visualization: trellis lineplot - # b. Trellis category: age decile - # c. X-axis: year - # d. y-axis: condition prevalence (% persons) - # e. series: male, female - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/death/sqlPrevalenceByGenderAgeYear.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - prevalenceByGenderAgeYearData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$PREVALENCE_BY_GENDER_AGE_YEAR = prevalenceByGenderAgeYearData - - # 2. Title: Prevalence by month - # a. Visualization: scatterplot - # b. X-axis: month/year - # c. y-axis: % of persons - # d. Comment: plot to show seasonality - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/death/sqlPrevalenceByMonth.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - prevalenceByMonthData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$PREVALENCE_BY_MONTH = prevalenceByMonthData - - # 3. Title: Death records by type - # a. Visualization: pie - # b. Category: death type - # c. value: % of records - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/death/sqlDeathByType.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - deathByTypeData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$DEATH_BY_TYPE = deathByTypeData - - # 4. Title: Age at death - # a. Visualization: side-by-side boxplot - # b. Category: gender - # c. Values: Min/25%/Median/95%/Max as age at death - - renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = addCdmVersionPath("/death/sqlAgeAtDeath.sql",cdmVersion), - packageName = "Achilles", - dbms = dbms, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - - ageAtDeathData <- DatabaseConnector::querySql(conn,renderedSql) - progress = progress + 1 - utils::setTxtProgressBar(progressBar, progress) - output$AGE_AT_DEATH = ageAtDeathData - - # Convert to JSON and save file result - jsonOutput = rjson::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/death.json", sep="")) - close(progressBar) -} +# @file exportToJson +# +# Copyright 2018 Observational Health Data Sciences and Informatics +# +# This file is part of Achilles +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# @author Observational Health Data Sciences and Informatics +# @author Chris Knoll +# @author Frank DeFalco +# @author Ajit Londhe + + +# When adding a new report, append it to inst/csv/export/all_reports.csv + +getAllReports <- function() { + allReports <- read.csv(file = system.file("csv", "export", "all_reports.csv", package = "Achilles"), + stringsAsFactors = FALSE, header = TRUE)$REPORT + return (allReports) +} + +initOutputPath <- function (outputPath){ + # create output path if it doesn't already exist, warn if it does + if (file.exists(outputPath)){ + writeLines(paste("Warning: folder",outputPath,"already exists")) + } else { + dir.create(paste(outputPath,"/",sep="")) + } +} + +#' @title showReportTypes +#' +#' @description +#' \code{showReportTypes} Displays the Report Types that can be passed as vector values to exportToJson. +#' +#' @details +#' exportToJson supports the following report types: +#' "CONDITION","CONDITION_ERA", "DASHBOARD", "DATA_DENSITY", "DEATH", "DRUG", "DRUG_ERA", "HEEL", "META", "OBSERVATION", "OBSERVATION_PERIOD", "PERSON", "PROCEDURE","VISIT" +#' +#' @return none (opens the allReports vector in a View() display) +#' @examples \dontrun{ +#' showReportTypes() +#' } +#' @export +showReportTypes <- function() { + utils::View(getAllReports()) +} + +#' @title exportToJson +#' +#' @description +#' \code{exportToJson} Exports Achilles statistics into a JSON form for reports. +#' +#' @details +#' Creates individual files for each report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the OMOP CDM. +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' @param reports A character vector listing the set of reports to generate. Default is all reports. +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' @param compressIntoOneFile Boolean indicating if the JSON files should be compressed into one zip file. +#' Please note that in Windows, the zip application must be stored in the system environment, +#' e.g. Sys.setenv("R_ZIPCMD", "some_path_to_zip"). +#' Due to recursion, the actual Achilles files and folders will be embedded in any parent directories that the source folder has. +#' +#' See \code{data(allReports)} for a list of all report types +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportToJson <- function (connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + outputPath = getwd(), + reports = getAllReports(), + vocabDatabaseSchema = cdmDatabaseSchema, + compressIntoOneFile = FALSE) { + + start <- Sys.time() + if (missing(resultsDatabaseSchema)) + resultsDatabaseSchema <- cdmDatabaseSchema + + initOutputPath(outputPath) + + # connect to the results schema + connectionDetails$schema = resultsDatabaseSchema + conn <- DatabaseConnector::connect(connectionDetails) + + # generate reports + + if ("CONDITION" %in% reports) + { + generateConditionTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateConditionReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("CONDITION_ERA" %in% reports) + { + generateConditionEraTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateConditionEraReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("DATA_DENSITY" %in% reports) + generateDataDensityReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + + if ("DEATH" %in% reports) + { + generateDeathReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("DRUG_ERA" %in% reports) + { + generateDrugEraTreemap(conn,connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateDrugEraReports(conn,connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("DRUG" %in% reports) + { + generateDrugTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateDrugReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("HEEL" %in% reports) + { + generateAchillesHeelReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if (("META" %in% reports)) + { + generateDomainMetaReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ( ("MEASUREMENT" %in% reports)) + { + generateMeasurementTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateMeasurementReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + + if ("OBSERVATION" %in% reports) + { + generateObservationTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateObservationReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("OBSERVATION_PERIOD" %in% reports) + generateObservationPeriodReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + + if ("PERSON" %in% reports) + generatePersonReport(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + + if ("PROCEDURE" %in% reports) + { + generateProcedureTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateProcedureReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + if ("VISIT" %in% reports) + { + generateVisitTreemap(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + generateVisitReports(conn, connectionDetails$dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema) + } + + # dashboard is always last + if ("DASHBOARD" %in% reports) + { + generateDashboardReport(outputPath) + } + + DatabaseConnector::disconnect(conn) + + if (compressIntoOneFile) { + zip(zipfile = file.path(outputPath, sprintf("%s.zip", cdmDatabaseSchema)), + files = c(outputPath), flags = c("-r")) + } + + delta <- Sys.time() - start + writeLines(paste("Export took", signif(delta,3), attr(delta,"units"))) + writeLines(paste("JSON files can now be found in",outputPath)) +} + +#' @title exportConditionToJson +#' +#' @description +#' \code{exportConditonToJson} Exports Achilles Condition report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Condition report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportConditionToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportConditionToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("CONDITION"), vocabDatabaseSchema) +} + +#' @title exportConditionEraToJson +#' +#' @description +#' \code{exportConditionEraToJson} Exports Achilles Condition Era report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Condition Era report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportConditionEraToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportConditionEraToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("CONDITION_ERA"), vocabDatabaseSchema) +} + +#' @title exportDashboardToJson +#' +#' @description +#' \code{exportDashboardToJson} Exports Achilles Dashboard report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Dashboard report found in Achilles.Web. NOTE: This function reads the results +#' from the other exports and aggregates them into a single file. If other reports are not genreated, this function will fail. +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportDashboardToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportDashboardToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DASHBOARD"), vocabDatabaseSchema) +} + +#' @title exportDataDensityToJson +#' +#' @description +#' \code{exportDataDensityToJson} Exports Achilles Data Density report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Data Density report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportDataDensityToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportDataDensityToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DATA_DENSITY"), vocabDatabaseSchema) +} + +#' @title exportDeathToJson +#' +#' @description +#' \code{exportDeathToJson} Exports Achilles Death report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Death report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportDeathToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportDeathToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DEATH"), vocabDatabaseSchema) +} + +#' @title exportDrugToJson +#' +#' @description +#' \code{exportDrugToJson} Exports Achilles Drug report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Drug report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportDrugToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportDrugToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DRUG"), vocabDatabaseSchema) +} + +#' @title exportDrugEraToJson +#' +#' @description +#' \code{exportDrugEraToJson} Exports Achilles Drug Era report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Drug Era report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportDrugEraToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportDrugEraToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("DRUG_ERA"), vocabDatabaseSchema) +} + +#' @title exportHeelToJson +#' +#' @description +#' \code{exportHeelToJson} Exports Achilles Heel report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Achilles Heel report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportHeelToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportHeelToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("HEEL"), vocabDatabaseSchema) +} + +#' @title exportMetaToJson +#' +#' @description +#' \code{exportMetaToJson} Exports Achilles Heel report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Achilles Heel report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportMetaToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportMetaToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("META"), vocabDatabaseSchema) +} + +#' @title exportMeasurementToJson +#' +#' @description +#' \code{exportMeasurementToJson} Exports Measurement report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Measurement report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportMeasurementToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportMeasurementToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("MEASUREMENT"), vocabDatabaseSchema) +} + +#' @title exportObservationToJson +#' +#' @description +#' \code{exportObservationToJson} Exports Achilles Observation report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Observation report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportObservationToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportObservationToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("OBSERVATION"), vocabDatabaseSchema) +} + +#' @title exportObservationPeriodToJson +#' +#' @description +#' \code{exportObservationPeriodToJson} Exports Achilles Observation Period report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Observation Period report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportObservationPeriodToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportObservationPeriodToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("OBSERVATION_PERIOD"), vocabDatabaseSchema) +} + +#' @title exportPersonToJson +#' +#' @description +#' \code{exportPersonToJson} Exports Achilles Person report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Person report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportPersonToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportPersonToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("PERSON"), vocabDatabaseSchema) +} + +#' @title exportProcedureToJson +#' +#' @description +#' \code{exportProcedureToJson} Exports Achilles Procedure report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Procedure report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportProcedureToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportProcedureToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("PROCEDURE"), vocabDatabaseSchema) +} + +#' @title exportVisitToJson +#' +#' @description +#' \code{exportVisitToJson} Exports Achilles Visit report into a JSON form for reports. +#' +#' @details +#' Creates individual files for Visit report found in Achilles.Web +#' +#' +#' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) +#' @param cdmDatabaseSchema Name of the database schema that contains the vocabulary files +#' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema +#' @param outputPath A folder location to save the JSON files. Default is current working folder +#' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' +#' @return none +#' @examples \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") +#' exportVisitToJson(connectionDetails, cdmDatabaseSchema="cdm4_sim", outputPath="your/output/path") +#' } +#' @export +exportVisitToJson <- function (connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) +{ + exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, reports = c("VISIT"), vocabDatabaseSchema) +} + +generateAchillesHeelReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating achilles heel report") + output <- {} + + queryAchillesHeel <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/achillesheel/sqlAchillesHeel.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + output$MESSAGES <- DatabaseConnector::querySql(conn,queryAchillesHeel) + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/achillesheel.json", sep="")) +} + +generateDomainMetaReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating domain meta report") + output <- {} + + queryDomainMeta <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/domainmeta/sqlDomainMeta.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema + ) + + if ("CDM_DOMAIN_META" %in% DatabaseConnector::getTableNames(connection = conn, databaseSchema = cdmDatabaseSchema)) + { + output$MESSAGES <- DatabaseConnector::querySql(conn, queryDomainMeta) + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/domainmeta.json", sep="")) + } + else + { + writeLines("No CDM_DOMAIN_META table found, skipping export") + } +} + +generateDrugEraTreemap <- function(conn, dbms,cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating drug era treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + +queryDrugEraTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drugera/sqlDrugEraTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataDrugEraTreemap <- DatabaseConnector::querySql(conn,queryDrugEraTreemap) + + write(rjson::toJSON(dataDrugEraTreemap,method="C"),paste(outputPath, "/drugera_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateDrugTreemap <- function(conn, dbms,cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating drug treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryDrugTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlDrugTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataDrugTreemap <- DatabaseConnector::querySql(conn,queryDrugTreemap) + + write(rjson::toJSON(dataDrugTreemap,method="C"),paste(outputPath, "/drug_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateConditionTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating condition treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryConditionTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/condition/sqlConditionTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataConditionTreemap <- DatabaseConnector::querySql(conn,queryConditionTreemap) + + write(rjson::toJSON(dataConditionTreemap,method="C"),paste(outputPath, "/condition_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateConditionEraTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating condition era treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryConditionEraTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/conditionera/sqlConditionEraTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataConditionEraTreemap <- DatabaseConnector::querySql(conn,queryConditionEraTreemap) + + write(rjson::toJSON(dataConditionEraTreemap,method="C"),paste(outputPath, "/conditionera_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateConditionReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating condition reports") + + treemapFile <- file.path(outputPath,"condition_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + + conditionsFolder <- file.path(outputPath,"conditions") + if (file.exists(conditionsFolder)){ + writeLines(paste("Warning: folder ",conditionsFolder," already exists")) + } else { + dir.create(paste(conditionsFolder,"/",sep="")) + + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/condition/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/condition/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryConditionsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/condition/sqlConditionsByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryAgeAtFirstDiagnosis <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/condition/sqlAgeAtFirstDiagnosis.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataConditionsByType <- DatabaseConnector::querySql(conn,queryConditionsByType) + dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn,queryAgeAtFirstDiagnosis) + + + buildConditionReport <- function(concept_id) { + report <- {} + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] + report$CONDITIONS_BY_TYPE <- dataConditionsByType[dataConditionsByType$CONDITION_CONCEPT_ID == concept_id,c(4,5)] + report$AGE_AT_FIRST_DIAGNOSIS <- dataAgeAtFirstDiagnosis[dataAgeAtFirstDiagnosis$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + filename <- paste(outputPath, "/conditions/condition_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildConditionReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) +} + +generateConditionEraReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating condition era reports") + + treemapFile <- file.path(outputPath,"conditionera_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + conditionsFolder <- file.path(outputPath,"conditioneras") + if (file.exists(conditionsFolder)){ + writeLines(paste("Warning: folder ",conditionsFolder," already exists")) + } else { + dir.create(paste(conditionsFolder,"/",sep="")) + + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/conditionera/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/conditionera/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryAgeAtFirstDiagnosis <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/conditionera/sqlAgeAtFirstDiagnosis.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryLengthOfEra <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/conditionera/sqlLengthOfEra.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataLengthOfEra <- DatabaseConnector::querySql(conn,queryLengthOfEra) + dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn,queryAgeAtFirstDiagnosis) + + + buildConditionEraReport <- function(concept_id) { + report <- {} + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(2,3,4,5)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(2,3)] + report$LENGTH_OF_ERA <- dataLengthOfEra[dataLengthOfEra$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$AGE_AT_FIRST_DIAGNOSIS <- dataAgeAtFirstDiagnosis[dataAgeAtFirstDiagnosis$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + filename <- paste(outputPath, "/conditioneras/condition_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildConditionEraReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) +} + +generateDrugEraReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating drug era reports") + + + treemapFile <- file.path(outputPath,"drugera_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + + drugerasFolder <- file.path(outputPath,"drugeras") + if (file.exists(drugerasFolder)){ + writeLines(paste("Warning: folder ",drugerasFolder," already exists")) + } else { + dir.create(paste(drugerasFolder,"/",sep="")) + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drugera/sqlAgeAtFirstExposure.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drugera/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drugera/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryLengthOfEra <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drugera/sqlLengthOfEra.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataLengthOfEra <- DatabaseConnector::querySql(conn,queryLengthOfEra) + + buildDrugEraReport <- function(concept_id) { + report <- {} + report$AGE_AT_FIRST_EXPOSURE <- dataAgeAtFirstExposure[dataAgeAtFirstExposure$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(2,3,4,5)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(2,3)] + report$LENGTH_OF_ERA <- dataLengthOfEra[dataLengthOfEra$CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] + + filename <- paste(outputPath, "/drugeras/drug_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildDrugEraReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) +} + +generateDrugReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating drug reports") + + treemapFile <- file.path(outputPath,"drug_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + drugsFolder <- file.path(outputPath,"drugs") + if (file.exists(drugsFolder)){ + writeLines(paste("Warning: folder ",drugsFolder," already exists")) + } else { + dir.create(paste(drugsFolder,"/",sep="")) + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlAgeAtFirstExposure.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryDaysSupplyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlDaysSupplyDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryDrugsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlDrugsByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryDrugFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlFrequencyDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryQuantityDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlQuantityDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryRefillsDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/drug/sqlRefillsDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) + dataDaysSupplyDistribution <- DatabaseConnector::querySql(conn,queryDaysSupplyDistribution) + dataDrugsByType <- DatabaseConnector::querySql(conn,queryDrugsByType) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataQuantityDistribution <- DatabaseConnector::querySql(conn,queryQuantityDistribution) + dataRefillsDistribution <- DatabaseConnector::querySql(conn,queryRefillsDistribution) + dataDrugFrequencyDistribution <- DatabaseConnector::querySql(conn,queryDrugFrequencyDistribution) + + buildDrugReport <- function(concept_id) { + report <- {} + report$AGE_AT_FIRST_EXPOSURE <- dataAgeAtFirstExposure[dataAgeAtFirstExposure$DRUG_CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$DAYS_SUPPLY_DISTRIBUTION <- dataDaysSupplyDistribution[dataDaysSupplyDistribution$DRUG_CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] + report$DRUGS_BY_TYPE <- dataDrugsByType[dataDrugsByType$DRUG_CONCEPT_ID == concept_id, c(3,4)] + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] + report$DRUG_FREQUENCY_DISTRIBUTION <- dataDrugFrequencyDistribution[dataDrugFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] + report$QUANTITY_DISTRIBUTION <- dataQuantityDistribution[dataQuantityDistribution$DRUG_CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] + report$REFILLS_DISTRIBUTION <- dataRefillsDistribution[dataRefillsDistribution$DRUG_CONCEPT_ID == concept_id, c(2,3,4,5,6,7,8,9)] + + filename <- paste(outputPath, "/drugs/drug_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildDrugReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) +} + +generateProcedureTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating procedure treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryProcedureTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/procedure/sqlProcedureTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataProcedureTreemap <- DatabaseConnector::querySql(conn,queryProcedureTreemap) + + write(rjson::toJSON(dataProcedureTreemap,method="C"),paste(outputPath, "/procedure_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateProcedureReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating procedure reports") + + treemapFile <- file.path(outputPath,"procedure_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + proceduresFolder <- file.path(outputPath,"procedures") + if (file.exists(proceduresFolder)){ + writeLines(paste("Warning: folder ",proceduresFolder," already exists")) + } else { + dir.create(paste(proceduresFolder,"/",sep="")) + + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/procedure/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/procedure/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryProcedureFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/procedure/sqlFrequencyDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryProceduresByType <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/procedure/sqlProceduresByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/procedure/sqlAgeAtFirstOccurrence.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataProceduresByType <- DatabaseConnector::querySql(conn,queryProceduresByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) + dataProcedureFrequencyDistribution <- DatabaseConnector::querySql(conn,queryProcedureFrequencyDistribution) + + buildProcedureReport <- function(concept_id) { + report <- {} + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] + report$PROCEDURE_FREQUENCY_DISTRIBUTION <- dataProcedureFrequencyDistribution[dataProcedureFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] + report$PROCEDURES_BY_TYPE <- dataProceduresByType[dataProceduresByType$PROCEDURE_CONCEPT_ID == concept_id,c(4,5)] + report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + filename <- paste(outputPath, "/procedures/procedure_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildProcedureReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) +} + +generatePersonReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) +{ + writeLines("Generating person reports") + progressBar <- utils::txtProgressBar(max=7,style=3) + progress = 0 + output = {} + + # 1. Title: Population + # a. Visualization: Table + # b. Row #1: CDM source name + # c. Row #2: # of persons + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/person/population.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + personSummaryData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + output$SUMMARY = personSummaryData + + # 2. Title: Gender distribution + # a. Visualization: Pie + # b. Category: Gender + # c. Value: % of persons + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/person/gender.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + genderData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + output$GENDER_DATA = genderData + + # 3. Title: Race distribution + # a. Visualization: Pie + # b. Category: Race + # c. Value: % of persons + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/person/race.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + raceData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + output$RACE_DATA = raceData + + # 4. Title: Ethnicity distribution + # a. Visualization: Pie + # b. Category: Ethnicity + # c. Value: % of persons + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/person/ethnicity.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + ethnicityData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + output$ETHNICITY_DATA = ethnicityData + + # 5. Title: Year of birth distribution + # a. Visualization: Histogram + # b. Category: Year of birth + # c. Value: # of persons + birthYearHist <- {} + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/person/yearofbirth_stats.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + birthYearStats <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + birthYearHist$MIN = birthYearStats$MIN_VALUE + birthYearHist$MAX = birthYearStats$MAX_VALUE + birthYearHist$INTERVAL_SIZE = birthYearStats$INTERVAL_SIZE + birthYearHist$INTERVALS = (birthYearStats$MAX_VALUE - birthYearStats$MIN_VALUE) / birthYearStats$INTERVAL_SIZE + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/person/yearofbirth_data.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + birthYearData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + birthYearHist$DATA <- birthYearData + + output$BIRTH_YEAR_HISTOGRAM <- birthYearHist + + # Convert to JSON and save file result + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/person.json", sep="")) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateObservationPeriodReport <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) +{ + writeLines("Generating observation period reports") + progressBar <- utils::txtProgressBar(max=11,style=3) + progress = 0 + output = {} + + # 1. Title: Age at time of first observation + # a. Visualization: Histogram + # b. Category: Age + # c. Value: # of persons + + ageAtFirstObservationHist <- {} + + # stats are hard coded for this result to make x-axis consistent across datasources + ageAtFirstObservationHist$MIN = 0 + ageAtFirstObservationHist$MAX =100 + ageAtFirstObservationHist$INTERVAL_SIZE = 1 + ageAtFirstObservationHist$INTERVALS = 100 + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/ageatfirst.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + ageAtFirstObservationData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + ageAtFirstObservationHist$DATA = ageAtFirstObservationData + output$AGE_AT_FIRST_OBSERVATION_HISTOGRAM <- ageAtFirstObservationHist + + # 2. Title: Age by gender + # a. Visualization: Side-by-side boxplot + # b. Category: Gender + # c. Values: Min/25%/Median/95%/Max - age at time of first observation + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/agebygender.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + ageByGenderData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$AGE_BY_GENDER = ageByGenderData + + # 3. Title: Length of observation + # a. Visualization: bar + # b. Category: length of observation period, 30d increments + # c. Values: # of persons + + observationLengthHist <- {} + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observationlength_stats.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + observationLengthStats <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + observationLengthHist$MIN = observationLengthStats$MIN_VALUE + observationLengthHist$MAX = observationLengthStats$MAX_VALUE + observationLengthHist$INTERVAL_SIZE = observationLengthStats$INTERVAL_SIZE + observationLengthHist$INTERVALS = (observationLengthStats$MAX_VALUE - observationLengthStats$MIN_VALUE) / observationLengthStats$INTERVAL_SIZE + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observationlength_data.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + observationLengthData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + observationLengthHist$DATA <- observationLengthData + + output$OBSERVATION_LENGTH_HISTOGRAM = observationLengthHist + + # 4. Title: Cumulative duration of observation + # a. Visualization: scatterplot + # b. X-axis: length of observation period + # c. Y-axis: % of population observed + # d. Note: will look like a Kaplan-Meier ‘survival’ plot, but information is the same as shown in ‘length of observation’ barchart, just plotted as cumulative + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/cumulativeduration.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + cumulativeDurationData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$CUMULATIVE_DURATION = cumulativeDurationData + + # 5. Title: Observation period length distribution, by gender + # a. Visualization: side-by-side boxplot + # b. Category: Gender + # c. Values: Min/25%/Median/95%/Max length of observation period + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observationlengthbygender.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + opLengthByGenderData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$OBSERVATION_PERIOD_LENGTH_BY_GENDER = opLengthByGenderData + + # 6. Title: Observation period length distribution, by age + # a. Visualization: side-by-side boxplot + # b. Category: Age decile + # c. Values: Min/25%/Median/95%/Max length of observation period + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observationlengthbyage.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + opLengthByAgeData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$OBSERVATION_PERIOD_LENGTH_BY_AGE = opLengthByAgeData + + # 7. Title: Number of persons with continuous observation by year + # a. Visualization: Histogram + # b. Category: Year + # c. Values: # of persons with continuous coverage + + observedByYearHist <- {} + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observedbyyear_stats.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + observedByYearStats <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + observedByYearHist$MIN = observedByYearStats$MIN_VALUE + observedByYearHist$MAX = observedByYearStats$MAX_VALUE + observedByYearHist$INTERVAL_SIZE = observedByYearStats$INTERVAL_SIZE + observedByYearHist$INTERVALS = (observedByYearStats$MAX_VALUE - observedByYearStats$MIN_VALUE) / observedByYearStats$INTERVAL_SIZE + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observedbyyear_data.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + observedByYearData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + observedByYearHist$DATA <- observedByYearData + + output$OBSERVED_BY_YEAR_HISTOGRAM = observedByYearHist + + # 8. Title: Number of persons with continuous observation by month + # a. Visualization: Histogram + # b. Category: Month/year + # c. Values: # of persons with continuous coverage + + observedByMonth <- {} + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/observedbymonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + observedByMonth <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + output$OBSERVED_BY_MONTH = observedByMonth + + # 9. Title: Number of observation periods per person + # a. Visualization: Pie + # b. Category: Number of observation periods + # c. Values: # of persons + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observationperiod/periodsperperson.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + personPeriodsData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$PERSON_PERIODS_DATA = personPeriodsData + + # Convert to JSON and save file result + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/observationperiod.json", sep="")) + close(progressBar) +} + +generateDashboardReport <- function(outputPath) +{ + writeLines("Generating dashboard report") + output <- {} + + progressBar <- utils::txtProgressBar(max=4,style=3) + progress = 0 + + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + personReport <- rjson::fromJSON(file = paste(outputPath, "/person.json", sep="")) + output$SUMMARY <- personReport$SUMMARY + output$GENDER_DATA <- personReport$GENDER_DATA + + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + opReport <- rjson::fromJSON(file = paste(outputPath, "/observationperiod.json", sep="")) + + output$AGE_AT_FIRST_OBSERVATION_HISTOGRAM = opReport$AGE_AT_FIRST_OBSERVATION_HISTOGRAM + output$CUMULATIVE_DURATION = opReport$CUMULATIVE_DURATION + output$OBSERVED_BY_MONTH = opReport$OBSERVED_BY_MONTH + + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/dashboard.json", sep="")) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateDataDensityReport <- function(conn, dbms,cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) +{ + writeLines("Generating data density reports") + progressBar <- utils::txtProgressBar(max=3,style=3) + progress = 0 + output = {} + + # 1. Title: Total records + # a. Visualization: scatterplot + # b. X-axis: month/year + # c. y-axis: records + # d. series: person, visit, condition, drug, procedure, observation + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/datadensity/totalrecords.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + totalRecordsData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$TOTAL_RECORDS = totalRecordsData + + # 2. Title: Records per person + # a. Visualization: scatterplot + # b. X-axis: month/year + # c. y-axis: records/person + # d. series: person, visit, condition, drug, procedure, observation + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/datadensity/recordsperperson.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + recordsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$RECORDS_PER_PERSON = recordsPerPerson + + # 3. Title: Concepts per person + # a. Visualization: side-by-side boxplot + # b. Category: Condition/Drug/Procedure/Observation + # c. Values: Min/25%/Median/95%/Max number of distinct concepts per person + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/datadensity/conceptsperperson.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + conceptsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$CONCEPTS_PER_PERSON = conceptsPerPerson + + # Convert to JSON and save file result + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/datadensity.json", sep="")) + close(progressBar) + +} + +generateMeasurementTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating measurement treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryMeasurementTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlMeasurementTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataMeasurementTreemap <- DatabaseConnector::querySql(conn,queryMeasurementTreemap) + + write(rjson::toJSON(dataMeasurementTreemap,method="C"),paste(outputPath, "/measurement_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) + +} + +generateMeasurementReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) +{ + writeLines("Generating Measurement reports") + + treemapFile <- file.path(outputPath,"measurement_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + measurementsFolder <- file.path(outputPath,"measurements") + if (file.exists(measurementsFolder)){ + writeLines(paste("Warning: folder ",measurementsFolder," already exists")) + } else { + dir.create(paste(measurementsFolder,"/",sep="")) + + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlFrequencyDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryMeasurementsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlMeasurementsByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlAgeAtFirstOccurrence.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryRecordsByUnit <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlRecordsByUnit.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryMeasurementValueDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlMeasurementValueDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryLowerLimitDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlLowerLimitDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryUpperLimitDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlUpperLimitDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryValuesRelativeToNorm <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/measurement/sqlValuesRelativeToNorm.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataMeasurementsByType <- DatabaseConnector::querySql(conn,queryMeasurementsByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) + dataRecordsByUnit <- DatabaseConnector::querySql(conn,queryRecordsByUnit) + dataMeasurementValueDistribution <- DatabaseConnector::querySql(conn,queryMeasurementValueDistribution) + dataLowerLimitDistribution <- DatabaseConnector::querySql(conn,queryLowerLimitDistribution) + dataUpperLimitDistribution <- DatabaseConnector::querySql(conn,queryUpperLimitDistribution) + dataValuesRelativeToNorm <- DatabaseConnector::querySql(conn,queryValuesRelativeToNorm) + dataFrequencyDistribution <- DatabaseConnector::querySql(conn,queryFrequencyDistribution) + + buildMeasurementReport <- function(concept_id) { + report <- {} + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] + report$FREQUENCY_DISTRIBUTION <- dataFrequencyDistribution[dataFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] + report$MEASUREMENTS_BY_TYPE <- dataMeasurementsByType[dataMeasurementsByType$MEASUREMENT_CONCEPT_ID == concept_id,c(4,5)] + report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + + report$RECORDS_BY_UNIT <- dataRecordsByUnit[dataRecordsByUnit$MEASUREMENT_CONCEPT_ID == concept_id,c(4,5)] + report$MEASUREMENT_VALUE_DISTRIBUTION <- dataMeasurementValueDistribution[dataMeasurementValueDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$LOWER_LIMIT_DISTRIBUTION <- dataLowerLimitDistribution[dataLowerLimitDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$UPPER_LIMIT_DISTRIBUTION <- dataUpperLimitDistribution[dataUpperLimitDistribution$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$VALUES_RELATIVE_TO_NORM <- dataValuesRelativeToNorm[dataValuesRelativeToNorm$MEASUREMENT_CONCEPT_ID == concept_id,c(4,5)] + + filename <- paste(outputPath, "/measurements/measurement_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildMeasurementReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) + +} + +generateObservationTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) { + writeLines("Generating observation treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryObservationTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observation/sqlObservationTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataObservationTreemap <- DatabaseConnector::querySql(conn,queryObservationTreemap) + + write(rjson::toJSON(dataObservationTreemap,method="C"),paste(outputPath, "/observation_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) + +} + +generateObservationReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema) +{ + writeLines("Generating Observation reports") + + treemapFile <- file.path(outputPath,"observation_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + observationsFolder <- file.path(outputPath,"observations") + if (file.exists(observationsFolder)){ + writeLines(paste("Warning: folder ",observationsFolder," already exists")) + } else { + dir.create(paste(observationsFolder,"/",sep="")) + + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observation/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observation/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryObsFrequencyDistribution <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observation/sqlFrequencyDistribution.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryObservationsByType <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observation/sqlObservationsByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/observation/sqlAgeAtFirstOccurrence.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataObservationsByType <- DatabaseConnector::querySql(conn,queryObservationsByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) + dataObsFrequencyDistribution <- DatabaseConnector::querySql(conn,queryObsFrequencyDistribution) + + + buildObservationReport <- function(concept_id) { + report <- {} + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] + report$OBS_FREQUENCY_DISTRIBUTION <- dataObsFrequencyDistribution[dataObsFrequencyDistribution$CONCEPT_ID == concept_id,c(3,4)] + report$OBSERVATIONS_BY_TYPE <- dataObservationsByType[dataObservationsByType$OBSERVATION_CONCEPT_ID == concept_id,c(4,5)] + report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + + + filename <- paste(outputPath, "/observations/observation_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildObservationReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) + +} + +generateVisitTreemap <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema){ + writeLines("Generating visit_occurrence treemap") + progressBar <- utils::txtProgressBar(max=1,style=3) + progress = 0 + + queryVisitTreemap <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/visit/sqlVisitTreemap.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataVisitTreemap <- DatabaseConnector::querySql(conn,queryVisitTreemap) + + write(rjson::toJSON(dataVisitTreemap,method="C"),paste(outputPath, "/visit_treemap.json", sep='')) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + + close(progressBar) +} + +generateVisitReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema){ + writeLines("Generating visit reports") + + treemapFile <- file.path(outputPath,"visit_treemap.json") + if (!file.exists(treemapFile)){ + writeLines(paste("Warning: treemap file",treemapFile,"does not exist. Skipping detail report generation.")) + return() + } + + treemapData <- rjson::fromJSON(file = treemapFile) + uniqueConcepts <- unique(treemapData$CONCEPT_ID) + totalCount <- length(uniqueConcepts) + + visitsFolder <- file.path(outputPath,"visits") + if (file.exists(visitsFolder)){ + writeLines(paste("Warning: folder ",visitsFolder," already exists")) + } else { + dir.create(paste(visitsFolder,"/",sep="")) + + } + + progressBar <- utils::txtProgressBar(style=3) + progress = 0 + + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/visit/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/visit/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryVisitDurationByType <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/visit/sqlVisitDurationByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/visit/sqlAgeAtFirstOccurrence.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) + dataVisitDurationByType <- DatabaseConnector::querySql(conn,queryVisitDurationByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) + + buildVisitReport <- function(concept_id) { + report <- {} + report$PREVALENCE_BY_GENDER_AGE_YEAR <- dataPrevalenceByGenderAgeYear[dataPrevalenceByGenderAgeYear$CONCEPT_ID == concept_id,c(3,4,5,6)] + report$PREVALENCE_BY_MONTH <- dataPrevalenceByMonth[dataPrevalenceByMonth$CONCEPT_ID == concept_id,c(3,4)] + report$VISIT_DURATION_BY_TYPE <- dataVisitDurationByType[dataVisitDurationByType$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + report$AGE_AT_FIRST_OCCURRENCE <- dataAgeAtFirstOccurrence[dataAgeAtFirstOccurrence$CONCEPT_ID == concept_id,c(2,3,4,5,6,7,8,9)] + filename <- paste(outputPath, "/visits/visit_" , concept_id , ".json", sep='') + + write(rjson::toJSON(report,method="C"),filename) + + #Update progressbar: + env <- parent.env(environment()) + curVal <- get("progress", envir = env) + assign("progress", curVal +1 ,envir= env) + utils::setTxtProgressBar(get("progressBar", envir= env), (curVal + 1) / get("totalCount", envir= env)) + } + + dummy <- lapply(uniqueConcepts, buildVisitReport) + + utils::setTxtProgressBar(progressBar, 1) + close(progressBar) +} + +generateDeathReports <- function(conn, dbms, cdmDatabaseSchema, resultsDatabaseSchema, outputPath, vocabDatabaseSchema = cdmDatabaseSchema){ + writeLines("Generating death reports") + progressBar <- utils::txtProgressBar(max=4,style=3) + progress = 0 + output = {} + + # 1. Title: Prevalence drilldown, prevalence by gender, age, and year + # a. Visualization: trellis lineplot + # b. Trellis category: age decile + # c. X-axis: year + # d. y-axis: condition prevalence (% persons) + # e. series: male, female + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/death/sqlPrevalenceByGenderAgeYear.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + prevalenceByGenderAgeYearData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$PREVALENCE_BY_GENDER_AGE_YEAR = prevalenceByGenderAgeYearData + + # 2. Title: Prevalence by month + # a. Visualization: scatterplot + # b. X-axis: month/year + # c. y-axis: % of persons + # d. Comment: plot to show seasonality + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/death/sqlPrevalenceByMonth.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + prevalenceByMonthData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$PREVALENCE_BY_MONTH = prevalenceByMonthData + + # 3. Title: Death records by type + # a. Visualization: pie + # b. Category: death type + # c. value: % of records + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/death/sqlDeathByType.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + deathByTypeData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$DEATH_BY_TYPE = deathByTypeData + + # 4. Title: Age at death + # a. Visualization: side-by-side boxplot + # b. Category: gender + # c. Values: Min/25%/Median/95%/Max as age at death + + renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/death/sqlAgeAtDeath.sql", + packageName = "Achilles", + dbms = dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + + ageAtDeathData <- DatabaseConnector::querySql(conn,renderedSql) + progress = progress + 1 + utils::setTxtProgressBar(progressBar, progress) + output$AGE_AT_DEATH = ageAtDeathData + + # Convert to JSON and save file result + jsonOutput = rjson::toJSON(output) + write(jsonOutput, file=paste(outputPath, "/death.json", sep="")) + close(progressBar) +} diff --git a/README-developers.md b/README-developers.md new file mode 100644 index 00000000..1291852d --- /dev/null +++ b/README-developers.md @@ -0,0 +1,199 @@ +# Achilles Developer README + +If you are interested in adding or modifying Achilles/Heel analyses, this is the section for you. A few key design principles: + +1. All analyses are split into separate files. This allows us to parallelize them if possible by using the OhdsiRTools clusterApply function, which allows for the spawning of multiple threads to process multiple list items. In this case, we have a list of analysis file names to process, before merging all of the staging tables into the final permanent tables as the last step. + +2. All analysis queries must be optimized for MPP systems by including a hashing hint. Generally, this is person_id or subject_id, or whichever field offers a useful index. Please refer to the DatabaseConnector package for more information. + +3. Main Achilles analyses (pre-computed aggregated stats about the data source) are stored in inst/analyses, Heel analyses (data quality checks about those aggregated stats) are stored in *inst/sql/sql_server/heels*, and export to JSON scripts are stored in *inst/sql/sql_server/exports*. Any post-processing activities such as index building and concept hierarchy table creation are stored in *inst/sql/sql_server/post_processing*. + +## Achilles Main Analyses + +* **inst/csv/schemas/schema_achilles_results.csv**: This file defines the schema for the main summary results. + +* **inst/csv/schemas/schema_achilles_results_dist.csv**: This file defines the schema for the main distributed results. + +* **inst/csv/achilles/achilles_analysis_details.csv**: This file outlines all of the main Achilles analyses, identified by analysis_id and analysis_name. + + + ANALYSIS_ID field + - The identifier of each main Achilles analysis; this identifier corresponds to SQL file names. + + + DISTRIBUTION field + - If the analysis provides distributed statistics, then DISTRIBUTION = 1 + - If it provides basic summary statistics, then DISTRIBUTION = 0 + - It it provides both, then DISTRIBUTION = -1 + + + COST field + - If the analysis is about the COST table, then COST = 1, else COST = 0 + + + DISTRIBUTED_FIELD field + - For cost analyses, the CDM field to analyze + + + ANALYSIS_NAME field + - The full description of the analysis + + + STRATUM_1_NAME, STRATUM_2_NAME, STRATUM_3_NAME, STRATUM_4_NAME, STRATUM_5_NAME fields + - CDM fields or conceptual values to stratify the analysis by + +* **inst/csv/achilles/achilles_cost_columns.csv**: This file defines the cost table field names per domain. + + + OLD field + - For CDM v5.0, these cost columns will be used, along with the older drug_cost and procedure_cost tables + + + CURRENT field + - For CDM v5.1+, these cost columns will be used, along with the unified COST table + +### How to Add a Main Achilles Analysis + +To add a new analysis, you would need to define it in the analysis_details.csv and store it in a SQL file with the analysis_id as the name. The query must conform to either the achilles_results or achilles_results_dist schemas. If you'd like the analysis to write to both tables, include all pertinent queries in the file, and give the DISTRIBUTUON a value of -1. Remember to provide a hashing hint for the query so that MPP systems can benefit from the performance gain. + +**Main Achilles Analysis SQL conventions** + +Follow the conventions of existing analyses: `select into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ from @cdmDatabaseSchema.;` + +* The `@scratchDatabaseSchema` parameter refers to the schema that will hold the staging table for this analysis. + +* The `@schemaDelim` parameter refers to the delimiter used for this destination staging table (either 's_' or '.'). This is changed based upon whether the user is selecting single threaded or multi-threaded mode. + +* The `@tempAchillesPrefix` parameter refers to the staging table prefix to use for each staging table ('tmpach' by default, or whatever the user provides). + +* The `@cdmDatabaseSchema` parameter refers to the schema that holds the CDM data. + +* Append the analysis_id number to the destination table so that the achilles function can find it. + +* In assigning the analysis_id, try to group the new analysis with other related analyses by referring to the *inst/csv/achilles/achilles_analysis_details.csv* file. + + +## Export to JSON + +* **inst/csv/export/all_reports.csv**: This file defines the standard reports to export when running the exportToJson function. + + + REPORT field + - The name of the report to export. Any new reports should be added to this column in order to be included as part of the default export. + + +## Achilles Heel Analyses + +* **inst/csv/heel/heel_rules_all.csv**: This file details all of the Heel data quality rules. These rules can be executed either in parallel or in serial. + + + RULE_ID field + - The identifier for the Heel rule; this is used to identify the SQL files for parallel heel_results and serial results_derived. For serial Heel analyses, the RULE_ID is the ordinal value for serial processing. + + + RULE_NAME field + - The name of the Heel rule + + + EXECUTION_TYPE field + - If the data quality query can be run in parallel, then "parallel." If it is dependent upon the results_derived or heel_results tables existing, then "serial." + + + DESTINATION_TABLE field + - Where will the Heel rule write results to: achilles_heel_results, achilles_results_derived, or both? + + + RULE_TYPE field + - The category of the rule; does it check data quality, some kind of error, or conformance to the CDM schema? + + + RULE_DESCRIPTION field + - The full description of the Heel rule + + + THRESHOLD field + - What is the threshold for the rule to throw an error, warning, or notification? + + + RULE_CLASSIFICATION field + - A category for the rule composition + + + RULE_SCOPE field + - For what population should this rule be applied? + + + LINKED_MEASURE field + - A foreign key to Achilles main or other Heel analyses + +* **inst/csv/heel/heel_results_derived_details.csv**: This file details the derived results found in the achilles_results_derived table. + + + QUERY_ID field + - The identifier of each derived Heel analysis; this identifier corresponds to SQL file names. + + + MEASURE_ID field + - The named key of each derived measure + + + NAME field + - The full name of the derived measure + + + STATISTIC_VALUE_NAME field + - The type of statistic (counts, percents, ratios) + + + STRATUM_1_NAME, STRATUM_2_NAME fields + - The values to stratify the measure by + + + DESCRIPTION field + - The full details about the derived measure. + + + ASSOCIATED_RULES field + - A foreign key to the Heel rules defined in the *inst/csv/heel/heel_rules_all.csv* file. + +* **inst/csv/heel/heel_rules_drilldown.csv**: This file details the queries for creating drilldown metrics based on higher level Heel results. + + + RULE_ID field + - The identifier for the Heel rule + + + LABEL field + - A brief description of the drilldown + + + DRILL_DOWN_TYPE field + - The source of the drilldown + + + LEVEL field + - The depth of the drilldown + + + DESCRIPTION field + - A detailed description of the drilldown + + + CODE field + - The SQL query to obtain the drilldown value + +### How to Add a Heel Analysis + +1. To add a new Heel analysis, first determine whether its results will reside in the achilles_heel_results table, the achilles_results_derived table, or both. + +2. Next, determine if the analysis depends on other analyses. If so, then it should go into the *inst/sql/sql_server/heels/serial* folder. If not, then it should go into the *inst/sql/sql_server/heels/parallel* folder. +3. Document the Heel analysis in the pertinent CSV files so that they are transparent and reachable by the achillesHeel function. The rule should be added to the *inst/csv/heel/heel_rules_all.csv* file; the rule_id will be important if this analysis needs to run in serial. If the rule includes derived details, add it to the *inst/csv/heel/heel_results_derived_details.csv* file; make sure to tag the rule_id in the ASSOCIATED_RULES field and create a new QUERY_ID to use as the SQL file name. If the rule includes a drilldown metric, include it in the *inst/csv/heel/heel_rules_drilldown.csv* file. + +4. Use the conventions below to write the SQL query. If the Heel analysis needs to run in serial, keep in mind the prerequisites for the new Heel analysis; does it rely upon the achilles_results_derived or achilles_heel_results tables at a specific stage of the achillesHeel execution? + +**Achilles Heel SQL Conventions** + +#### Parallel files + +Follow the conventions of existing Heel Results queries: `select into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName from @resultsDatabaseSchema.achilles_analysis` + +* The `@scratchDatabaseSchema` parameter refers to the schema that will hold the staging table for this analysis. + +* The `@schemaDelim` parameter refers to the delimiter used for this destination staging table (either 's_' or '.'). This is changed based upon whether the user is selecting single threaded or multi-threaded mode. + +* The `@tempHeelPrefix` parameter refers to the staging table prefix to use for each staging table ('tmpheel' by default, or whatever the user provides). + +* The `@resultsDatabaseSchema` parameter refers to the schema that holds the Achilles tables. + +* The `@heelName` parameter is used to uniquely identify the Heel result (and corresponds to the SQL file name). + + +#### Serial files: achilles_results_derived + +Follow the conventions of existing Heel Results queries: `select into #serial_rd_@rdNewId from #serial_rd_@rdOldId` + +* As these will be run in serial, there is no need to use permanent staging tables. Instead, we use temp staging tables that are then merged into the final permanent achilles_results_derived table. This is to ensure best performance from MPP database platforms. + +* The `@rdNewId` parameter refers to the serial file ID of the new achilles_results_derived analysis. The achillesHeel function will assign this based on the rule_id. + +* The `@rdOldId` parameter refers to the serial file ID of the previous achilles_results_derived analysis. The achillesHeel function will assign this based on the rule_id. + + +#### Serial files: achilles_heel_results + +Follow the conventions of existing Heel Results queries: `select into #serial_hr_@hrNewId from #serial_hr_@hrOldId` + +* As these will be run in serial, there is no need to use permanent staging tables. Instead, we use temp staging tables that are then merged into the final permanent achilles_heel_results table. This is to ensure best performance from MPP database platforms. + +* The `@hrNewId` parameter refers to the serial file ID of the new achilles_heel_results analysis. The achillesHeel function will assign this based on the rule_id. + +* The `@hrOldId` parameter refers to the serial file ID of the previous achilles_heel_results analysis. The achillesHeel function will assign this based on the rule_id. + diff --git a/README-impala.md b/README-impala.md old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 518fc637..eee9f47b --- a/README.md +++ b/README.md @@ -1,150 +1,214 @@ -Achilles -======== - -Automated Characterization of Health Information at Large-scale Longitudinal Evidence Systems (ACHILLES) - descriptive statistics about a OMOP CDM v4/v5 database - -Achilles consists of several parts: 1. precomputations (for database characterization) 2. Achilles Heel for data quality and 3. export feature for AchillesWeb - -Achilles Heel is activelly being developed for CDM v5 only. - -Getting Started -=============== -(Please review the [Achilles Wiki](https://github.com/OHDSI/Achilles/wiki/Additional-instructions-for-Linux) for specific details for Linux) - -1. Make sure you have your data in the OMOP CDM v4/v5 format (v4 link http://omop.org/cdm v5 link:http://www.ohdsi.org/web/wiki/doku.php?id=documentation:cdm). - -2. Make sure that you have Java installed. If you don't have Java already intalled on your computed (on most computers it already is installed), go to [java.com](http://java.com) to get the latest version. (If you have trouble building with rJava below, be sure on Windows that your Path variable includes the path to jvm.dll (Windows Button --> type "path" --> Edit Environmental Variables --> Edit PATH variable, add to end ;C:/Program Files/Java/jre/bin/server) or wherever it is on your system.) - -3. in R, use the following commands to install Achilles (if you have prior package installations of aony of these packages, you may need to first unistall them using the command remove.packages()). - - ```r - install.packages("devtools") - library(devtools) - install_github("ohdsi/SqlRender") - install_github("ohdsi/DatabaseConnector") - install_github("ohdsi/Achilles") - #install_github("OHDSI/Achilles",args="--no-multiarch") #to avoid Java 32 vs 64 issues - #install_github("OHDSI/OhdsiRTools@v1.3.0")#use a prior released version (to bypass fresh errors) - ``` - -4. To run the Achilles analysis, use the following commands in R: (use runCostAnalysis = F or runHeel = F if necessary) - - ```r - library(Achilles) - connectionDetails <- createConnectionDetails(dbms="redshift", server="server.com", user="secret", - password='secret', schema="cdm5_inst", port="5439") - achillesResults <- achilles(connectionDetails, cdmDatabaseSchema="cdm5_inst", - resultsDatabaseSchema="results", sourceName="My Source Name", - cdmVersion = "cdm version", vocabDatabaseSchema="vocabulary") - ``` - "cdm4_inst" cdmDatabaseSchema parmater, "results" resultsDatabaseSchema parameter, and "vocabulary" vocabDatabaseSchema are the names of the schemas holding the CDM data, targeted for result writing, and holding the Vocabulary data respectively. See the [DatabaseConnector](https://github.com/OHDSI/DatabaseConnector) package for details on settings the connection details for your database, for example by typing - - Execution of all Achilles pre-computations may take a long time. See notes.md file to find out how some analyses can be excluded to make the execution faster (excluding cost pre-computations) - ```r - ?createConnectionDetails - ``` - Currently "sql server", "oracle", "postgresql", and "redshift" are supported as dbms. - "cdmVersion" can be either 4 or 5 (note that some Achilles features are only implemented for version 5). - -5. To use [AchillesWeb](https://github.com/OHDSI/AchillesWeb) to explore the Achilles statistics, you must first export the statistics to JSON files: - ```r - exportToJson(connectionDetails, cdmDatabaseSchema = "cdm4_inst", resultsDatabaseSchema = "results", outputPath = "c:/myPath/AchillesExport", cdmVersion = "cdm version", vocabDatabaseSchema = "vocabulary") - ``` - -6. To run only Achilles Heel (component of Achilles), use the following command: - ```r - achillesHeel(connectionDetails, cdmDatabaseSchema = "cdm4_inst", resultsDatabaseSchema = "results", cdmVersion = "cdm version", vocabDatabaseSchema = "vocabulary") - ``` - -7. Possible optional additional steps: - -To see what errors were found (from within R), run `fetchAchillesHeelResults(connectionDetails,resultsDatabaseSchema)` - -To see a particular analysis, run `fetchAchillesAnalysisResults(connectionDetails,resultsDatabaseSchema,analysisId = 2)` - -To join data tables with some lookup (overview files), obtains those using commands below: - -To get description of analyses, run `getAnalysisDetails()`. - -To get description of derived measures, run `read.csv(system.file("csv","derived_analysis_details",package="Achilles"),as.is=T)` - -Similarly, for overview of rules, run -`read.csv(system.file("csv","achilles_rule.csv",package="Achilles"),as.is=T)` - -Also see [notes.md](extras/notes.md) for more information (in the extras folder). - - -Getting Started with Docker -=========================== -This is an alternative method for running Achilles that does not require R and Java installations, using a Docker container instead. - -1. Install [Docker](https://docs.docker.com/installation/) and [Docker Compose](https://docs.docker.com/compose/install/). - -2. Clone this repository with git (`git clone https://github.com/OHDSI/Achilles.git`) and make it your working directory (`cd Achilles`). - -3. Copy `env_vars.sample` to `env_vars` and fill in the variable definitions. The `ACHILLES_DB_URI` should be formatted as `://:@/`. - -4. Copy `docker-compose.yml.sample` to `docker-compose.yml` and fill in the data output directory. - -5. Build the docker image with `docker-compose build`. - -6. Run Achilles in the background with `docker-compose run -d achilles`. - -Alternatively, you can run it with one long command line, like in the following example: - -```bash -docker run \ - --rm \ - --net=host \ - -v "$(pwd)"/output:/opt/app/output \ - -e ACHILLES_SOURCE=DEFAULT \ - -e ACHILLES_DB_URI=postgresql://webapi:webapi@localhost:5432/ohdsi \ - -e ACHILLES_CDM_SCHEMA=cdm5 \ - -e ACHILLES_VOCAB_SCHEMA=cdm5 \ - -e ACHILLES_RES_SCHEMA=webapi \ - -e ACHILLES_CDM_VERSION=5 \ - -``` - -License -======= -Achilles is licensed under Apache License 2.0 - - -# Pre-computations - -Achilles has some compatibility with Data Quality initiatives of the Data Quality Collaborative (DQC; http://repository.edm-forum.org/dqc or GitHub https://github.com/orgs/DQCollaborative). For example, a harmonized set of data quality terms has been published by Khan at al. in 2016. - -What Achilles calls an *analysis* (a pre-computation for a given dataset), the term used by DQC would be *measure* - -Some Heel Rules take advantage of derived measures. A feature of Heel introduced since version 1.4. A *derived measure* is a result of an SQL query that takes Achilles analyses as input. It is simply a different view of the precomputations that has some advantage to be materialized. The logic for computing a derived measures can be viewed in the `AchillesHeel_v5.sql` file. - -Overview of derived measures can be seen in file `derived_analysis_details.csv`. - -For possible future flexible setting of Achilles Heel rule thresholds, some Heel rules are split into two phase approach. First, a derived measure is computed and the result is stored in a separate table `ACHILLES_RESULTS_DERIVED`. A Heel rule logic is than made simpler by a simple comparison whether a derived measure is over a threshold. A link between which rules use which pre-computation is available in file `inst\csv\achilles_rule.csv` (see column `linked_measure`). - - -# Heel Rules - -Rules are classified into `CDM conformance` rules and `DQ` rules (see column `rule_type` in the rule CSV file). - - -Some Heel rules can be generalized to non-OMOP datasets. Other rules are dependant on OMOP concept ids and a translation of the code to other CDMs would be needed (for example rule with `rule_id` of `29` uses OMOP specific concept;concept 195075). - -Rules that have in their name a prefix `[GeneralPopulationOnly]` are applicable to datasets that represent a general population. Once metadata for this parameter is implemented by OHDSI, their execution can be limited to such datasets. In the meantime, users should ignore output of rules that are meant for general population if their dataset is not of that type. - -Rules are classified into: error, warning and notification (see column `severity`). - - -Development -=========== -Achilles is being developed in R Studio. - -### Development status -[![Build Status](https://travis-ci.org/OHDSI/Achilles.svg?branch=master)](https://travis-ci.org/OHDSI/Achilles) -[![codecov.io](https://codecov.io/github/OHDSI/Achilles/coverage.svg?branch=master)](https://codecov.io/github/OHDSI/Achilles?branch=master) - - - -# Acknowledgements -- This project is supported in part through the National Science Foundation grant IIS 1251151. +Achilles +======== + +Automated Characterization of Health Information at Large-scale Longitudinal Evidence Systems (ACHILLES) - descriptive statistics and data quality checks on an OMOP CDM v5 databases + +Achilles consists of several parts: +1. Precomputations (for database characterization) +2. Achilles Heel for data quality +3. Export feature for AchillesWeb (or, Atlas Data Sources can read the Achilles tables directly) +4. Index generation for better performance with Atlas Data Sources + +Achilles is actively being developed for CDM v5.x only. + +Getting Started +=============== +(Please review the [Achilles Wiki](https://github.com/OHDSI/Achilles/wiki/Additional-instructions-for-Linux) for specific details for Linux) + +1. Make sure you have your data in the OMOP CDM v5.x format (https://github.com/OHDSI/CommonDataModel). + +2. Make sure that you have Java installed. If you don't have Java already installed on your computer (on most computers it already is installed), go to [java.com](http://java.com) to get the latest version. (If you have trouble building with rJava below, be sure on Windows that your Path variable includes the path to jvm.dll: + +```Windows Button and R --> type "sysdm.cpl" --> Advanced tab --> Environmental Variables button --> Edit PATH variable, and then add to the end your Java Path (e.g. ';C:/Program Files/Java/jre/bin/server')``` + +3. In R, use the following commands to install Achilles (if you have prior package installations of any of these packages, you may need to first uninstall them using the command remove.packages()). + + ```r + install.packages("devtools") + library(devtools) + install_github("OHDSI/SqlRender") + install_github("OHDSI/DatabaseConnector") + install_github("OHDSI/Achilles") + #devtools::install_github("OHDSI/Achilles@*release") #to install latest release (if master contains a bug for you) + #install_github("OHDSI/Achilles", args="--no-multiarch") #to avoid Java 32 vs 64 issues + ``` + +4. To run the Achilles analysis, first determine if you'd like to run the function in multi-threaded mode or in single-threaded mode. Use 'runCostAnalysis = FALSE' to save on execution time, as cost analyses tend to run long. + +**In multi-threaded mode** + +The analyses are run in multiple SQL sessions, which can be set using the 'numThreads' setting and setting scratchDatabaseSchema to something other than '#'. For example, 10 threads means 10 independent SQL sessions. Intermediate results are written to scratch tables before finally being combined into the final results tables. Scratch tables are permanent tables; you can either choose to have Achilles drop these tables ('dropScratchTables = TRUE') or you can drop them at a later time ('dropScratchTables = FALSE'). Dropping the scratch tables can add time to the full execution. If desired, you can set your own custom prefix for all Achilles analysis scratch tables (tempAchillesPrefix) and/or for all Achilles Heel scratch tables (tempHeelPrefix). + +**In single-threaded mode** + +The analyses are run in one SQL session and all intermediate results are written to temp tables before finally being combined into the final results tables. Temp tables are dropped once the package is finished running. Single-threaded mode can be invoked by either setting 'numThreads = 1' or 'scratchDatabaseSchema = #'. + + +use the following commands in R: + + ```r + library(Achilles) + connectionDetails <- createConnectionDetails( + dbms="redshift", + server="server.com", + user="secret", + password='secret', + port="5439") + ``` + + **Single-threaded mode** + + ```r + achilles(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema="results", + vocabDatabaseSchema = "vocab", + numThreads = 1, + sourceName = "My Source Name", + cdmVersion = "5.3.0", + runHeel = TRUE, + runCostAnalysis = TRUE) + ``` + **Multi-threaded mode** + + ```r + achilles(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema = "results", + scratchDatabaseSchema = "scratch", + vocabDatabaseSchema = "vocab", + numThreads = 10, + sourceName = "My Source Name", + cdmVersion = "5.3.0", + runHeel = TRUE, + runCostAnalysis = TRUE) + ``` + +The "cdm5_inst" cdmDatabaseSchema parameter, "results" resultsDatabaseSchema parameter, and "scratch" scratchDatabaseSchema parameter are the fully qualified names of the schemas holding the CDM data, targeted for result writing, and holding the intermediate scratch tables, respectively. See the [DatabaseConnector](https://github.com/OHDSI/DatabaseConnector) package for details on settings the connection details for your database, for example by typing + + ```r + ?createConnectionDetails + ``` + +Execution of all Achilles pre-computations may take a long time, particularly in single-threaded mode and with COST analyses enabled. See notes.md file to find out how some analyses can be excluded to make the execution faster (excluding cost pre-computations) + +Currently "sql server", "pdw", "oracle", "postgresql", "redshift", "mysql", "impala", and "bigquery" are supported as dbms. "cdmVersion" can be *ONLY* 5.x (please look at prior commit history for v4 support). + +5. To use [AchillesWeb](https://github.com/OHDSI/AchillesWeb) to explore the Achilles statistics, you must first export the statistics to a folder JSON files, which can optionally be compressed into one gzipped file for easier transportability. + ```r + exportToJson(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema = "results", + outputPath = "c:/myPath/AchillesExport", + cdmVersion = "5.3.0", + compressIntoOneFile = TRUE # creates gzipped file of all JSON files) + ``` + +6. To run only Achilles Heel (component of Achilles), use the following command: + ```r + achillesHeel(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema = "results", + scratchDatabaseSchema = "scratch", + numThreads = 10, # multi-threaded mode + cdmVersion = "5.3.0") + ``` + +7. Possible optional additional steps: + + - To see what errors were found (from within R), run `fetchAchillesHeelResults(connectionDetails,resultsDatabaseSchema)` + + - To see a particular analysis, run `fetchAchillesAnalysisResults(connectionDetails,resultsDatabaseSchema,analysisId = 2)` + + - To join data tables with some lookup (overview files), obtains those using commands below: + + - To get description of analyses, run `getAnalysisDetails()`. + + - To get description of derived measures, run `read.csv(system.file("csv","derived_analysis_details",package="Achilles"),as.is=T)` + + - Similarly, for overview of rules, run + +```read.csv(system.file("csv","achilles_rule.csv",package="Achilles"),as.is=T)``` + + - Also see [notes.md](extras/notes.md) for more information (in the extras folder). + +Developers: How to Add or Modify Analyses +================================== + +Please refer to the [README-developers.md file](README-developers.md). + + +Getting Started with Docker +=========================== +This is an alternative method for running Achilles that does not require R and Java installations, using a Docker container instead. + +1. Install [Docker](https://docs.docker.com/installation/) and [Docker Compose](https://docs.docker.com/compose/install/). + +2. Clone this repository with git (`git clone https://github.com/OHDSI/Achilles.git`) and make it your working directory (`cd Achilles`). + +3. Copy `env_vars.sample` to `env_vars` and fill in the variable definitions. The `ACHILLES_DB_URI` should be formatted as `://:@/`. + +4. Copy `docker-compose.yml.sample` to `docker-compose.yml` and fill in the data output directory. + +5. Build the docker image with `docker-compose build`. + +6. Run Achilles in the background with `docker-compose run -d achilles`. + +Alternatively, you can run it with one long command line, like in the following example: + +```bash +docker run \ + --rm \ + --net=host \ + -v "$(pwd)"/output:/opt/app/output \ + -e ACHILLES_SOURCE=DEFAULT \ + -e ACHILLES_DB_URI=postgresql://webapi:webapi@localhost:5432/ohdsi \ + -e ACHILLES_CDM_SCHEMA=cdm5 \ + -e ACHILLES_VOCAB_SCHEMA=cdm5 \ + -e ACHILLES_RES_SCHEMA=webapi \ + -e ACHILLES_CDM_VERSION=5 \ + +``` + +License +======= +Achilles is licensed under Apache License 2.0 + + +# Pre-computations + +Achilles has some compatibility with Data Quality initiatives of the Data Quality Collaborative (DQC; http://repository.edm-forum.org/dqc or GitHub https://github.com/orgs/DQCollaborative). For example, a harmonized set of data quality terms has been published by Khan at al. in 2016. + +What Achilles calls an *analysis* (a pre-computation for a given dataset), the term used by DQC would be *measure*. + +Some Heel Rules take advantage of derived measures. A feature of Heel introduced since version 1.4. A *derived measure* is a result of an SQL query that takes Achilles analyses as input. It is simply a different view of the precomputations that has some advantage to be materialized. The logic for computing a derived measures can be viewed in the Heel SQL files in `/inst/sql/sql_server/heels`, which are described further in the [Developers README file](README-developers.md). + +Overview of derived measures can be seen in [CSV file here](inst/csv/heel/heel_results_derived_details.csv). + +For possible future flexible setting of Achilles Heel rule thresholds, some Heel rules are split into two phase approach. First, a derived measure is computed and the result is stored in a separate table `ACHILLES_RESULTS_DERIVED`. A Heel rule logic is than made simpler by a simple comparison whether a derived measure is over a threshold. A link between which rules use which pre-computation is available in [CSV file here](inst/csv/heel/heel_rules_all.csv) (previously was in `inst/csv/achilles_rule.csv`) (see column `linked_measure`). + + +# Heel Rules + +Rules are classified into `CDM conformance` rules and `DQ` rules - see column `rule_type` in the [CSV file here](inst/csv/heel/heel_rules_all.csv). + + +Some Heel rules can be generalized to non-OMOP datasets. Other rules are dependant on OMOP concept ids and a translation of the code to other CDMs would be needed (for example rule with `rule_id` of `29` uses OMOP specific concept;concept 195075). + +Rules that have in their name a prefix `[GeneralPopulationOnly]` are applicable to datasets that represent a general population. Once metadata for this parameter is implemented by OHDSI, their execution can be limited to such datasets. In the meantime, users should ignore output of rules that are meant for general population if their dataset is not of that type. + +Rules are classified into: error, warning and notification (see column `severity`). + + +Development +=========== +Achilles is being developed in RStudio. + +### Development status +[![Build Status](https://travis-ci.org/OHDSI/Achilles.svg?branch=master)](https://travis-ci.org/OHDSI/Achilles) +[![codecov.io](https://codecov.io/github/OHDSI/Achilles/coverage.svg?branch=master)](https://codecov.io/github/OHDSI/Achilles?branch=master) + + + +# Acknowledgements +- This project is supported in part through the National Science Foundation grant IIS 1251151. diff --git a/data/allReports.rda b/data/allReports.rda deleted file mode 100644 index 838cbadb..00000000 Binary files a/data/allReports.rda and /dev/null differ diff --git a/docker-compose.yml.sample b/docker-compose.yml.sample old mode 100644 new mode 100755 diff --git a/docker-run b/docker-run index 04d2c787..82e8a889 100755 --- a/docker-run +++ b/docker-run @@ -54,8 +54,8 @@ if (length(args) == 0 || args[1] != "heel") { # Run Achilles Heel only achillesHeel( - connectionDetails, - cdmDatabaseSchema=env_vars$ACHILLES_CDM_SCHEMA, + connectionDetails, + cdmDatabaseSchema=env_vars$ACHILLES_CDM_SCHEMA, resultsDatabaseSchema=env_vars$ACHILLES_RES_SCHEMA, vocabDatabaseSchema=env_vars$ACHILLES_VOCAB_SCHEMA, cdmVersion=env_vars$ACHILLES_CDM_VERSION) diff --git a/env_vars.sample b/env_vars.sample old mode 100644 new mode 100755 diff --git a/extras/Heel-Rules.html b/extras/Heel-Rules.html old mode 100644 new mode 100755 diff --git a/extras/PackageMaintenance.R b/extras/PackageMaintenance.R old mode 100644 new mode 100755 index 341c99d9..50fd5e6a --- a/extras/PackageMaintenance.R +++ b/extras/PackageMaintenance.R @@ -4,4 +4,4 @@ connectionDetails$schema=resultsDatabaseSchema conn<-connect(connectionDetails) achilles_analysis<-querySql(conn,'select * from achilles_analysis') #this line caused issue 151: names(achilles_analysis) <- tolower(names(achilles_analysis)) -write.csv(achilles_analysis,file = 'inst/csv/analysisDetails.csv',na = '',row.names = F) +write.csv(achilles_analysis,file = 'inst/csv/achilles/achilles_analysis_details.csv',na = '',row.names = F) diff --git a/extras/Rule-Drill-Down.html b/extras/Rule-Drill-Down.html old mode 100644 new mode 100755 diff --git a/extras/notes.html b/extras/notes.html new file mode 100644 index 00000000..4ae307c5 --- /dev/null +++ b/extras/notes.html @@ -0,0 +1,489 @@ + + + + + + + + + + + + + +notes.utf8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
+

Cost tables

+

CDM version 5.0 had cost tables that were deleted in version 5.0.1. To avoid errors, use a parameter when calling the achilles function that specifies runCostAnalysis = FALSE

+
+
+

How to run Achilles Heel only:

+

Execution of all analyses computations is not necessary if all you want to do is to run new data quality measures in a revised version of Heel. Instead of 10+ hours, you can be done in few minutes with running just heel

+
#initialize your connectionDetails as usual
+#set your schema names for where is data and where to store results
+  myCdm='cdm5_inst';resDb='results'
+
+#run heel only like this
+  heel<-achillesHeel(connectionDetails,cdmDatabaseSchema = myCdm, resultsDatabaseSchema = resDb,cdmVersion = "5")
+
+#optionally - get heel errors and warnings as a small CSV file
+  heelRes<-fetchAchillesHeelResults(connectionDetails,resDb)
+  write.csv(heelRes,paste0(myCdm,'-01-heel-res.csv'),row.names = F,na = '')
+
+
+
+

How to save time on running full Achilles - make it finish much earlier

+

If you are willing to skip cost analyses (not used very often), this smaller set of analyses will finish much earlier

+
#get all possible analyses first
+allAnalyses=getAnalysisDetails()$ANALYSIS_ID
+
+#cost analyses may take 15+ hours and may not be always necessary
+longAnalyses1=c(1500:1699)
+
+#exclude them
+subSet1=setdiff(allAnalyses,longAnalyses1)
+
+
+#create connection details (modify for your server)
+connectionDetails <- createConnectionDetails(dbms="redshift", server="server.com", user="secret",
+                            password='secret', schema="cdm5_inst", port="5439")
+
+#run Achilles (and Heel)  with this smaller subSet1 only (this will save several hours (or days) of your execution time)
+achillesResults <- achilles(connectionDetails, cdmDatabaseSchema="cdm5_inst", 
+                            resultsDatabaseSchema="results", sourceName="My Source Name", 
+                            vocabDatabaseSchema="vocabulary",cdmVersion = "5",analysisIds = subSet1)
+
+
+

Execute only few new analyses

+

Achilles can take a long time to execute. To see new analyses, it is possible to only execute those new analyses. E.g., newly integrated Iris analyses. Use the following code that specifies a set of analysis_id’s. The key is to specify which anlayses to run, and to specify createTables to FALSE so that this execution will preserve results previously executed.

+
cdmDatabaseSchema='ccae_v5'    #change to yours
+resultsDatabaseSchema='nih'    #change to yours
+vocabDatabaseSchema='ccae_v5'  #change to yours
+achillesResults <- achilles(connectionDetails,cdmDatabaseSchema=cdmDatabaseSchema,
+                            resultsDatabaseSchema=resultsDatabaseSchema,
+                            sourceName="My Source Name", 
+                            vocabDatabaseSchema=vocabDatabaseSchema,
+                            cdmVersion = "5",
+                            createTable = F,analysisIds = c(2000,2001))
+                            
+
+
+

Small maintenance tasks for the package

+
+

update CSV overview file for analyses

+
connectionDetails$schema=resultsDatabaseSchema
+conn<-connect(connectionDetails)
+achilles_analysis<-querySql(conn,'select * from achilles_analysis')
+#this line caused issue 151: names(achilles_analysis) <- tolower(names(achilles_analysis))
+write.csv(achilles_analysis,file = 'inst/csv/analysisDetails.csv',na = '',row.names = F)
+
+#optionaly insert rule overview into the database
+#or rewrite this as  package file
+read.csv(system.file("csv","derived_analysis_details",package="Achilles"),as.is=T)
+
+#achilles_derived_measures<-read.csv(file = 'inst/csv/derived_analysis_details.csv',as.is=T)
+achilles_derived_measures<-read.csv(system.file("csv","derived_analysis_details.csv",package="Achilles"),as.is=T)
+insertTable(conn,'achilles_derived_measures',achilles_derived_measures)
+
+achilles_rule<-read.csv(system.file("csv","achilles_rule.csv",package="Achilles"),as.is=T)
+insertTable(conn,'achilles_rule',achilles_rule)
+
+
+
+

overview html files

+

The code below updates html files that show content overview. Use rawgit.com/OHDSI/… to view it nicely.

+
tempf<-tempfile(pattern = 'temp', fileext = '.Rmd')
+writeLines('---\ntitle: "Rules"\n---\n```{r, echo=FALSE}\n rules<-read.csv(system.file("csv","achilles_rule.csv",package="Achilles"),as.is=T);knitr::kable(rules)\n```',tempf)
+rmarkdown::render(tempf,output_file = 'c:/temp/Heel-Rules.html',rmarkdown::html_document(toc = F, fig_caption = TRUE))
+
+
+tempf<-tempfile(pattern = 'temp', fileext = '.Rmd')
+writeLines('---\ntitle: "Overview"\n---\n```{r, echo=FALSE}\n rules<-read.csv(system.file("csv","derived_analysis_details.csv",package="Achilles"),as.is=T);knitr::kable(rules)\n```',tempf)
+rmarkdown::render(tempf,output_file = 'c:/temp/Derived-Analyses.html',rmarkdown::html_document(toc = F, fig_caption = TRUE))
+
+tempf<-tempfile(pattern = 'temp', fileext = '.Rmd')
+writeLines('---\ntitle: "Overview"\n---\n```{r, echo=FALSE}\n rules<-read.csv(system.file("csv","rule_drill_down.csv",package="Achilles"),as.is=T);knitr::kable(rules)\n```',tempf)
+rmarkdown::render(tempf,output_file = 'c:/temp/Rule-Drill-Down.html',rmarkdown::html_document(toc = F, fig_caption = TRUE))
+
+
+
+

Data Quality CDM

+

These notes relate Achilles and Achilles Heel to Data Quality CDM (DQ CDM)

+

DQM terminology is slightly different

+

Achilles term = DQM term
+analysis = measure
+stratum = dimension
+rule = check

+
+

Classification of measures

+
+

by PURPOSE

+
    +
  • general purpose measure (% of males)
  • +
  • measure specific for DQ (count of rows with invalid provider_id) ### by OUTPUT
    +
  • +
  • single row measure (count of providers)
  • +
  • multiple rows measure (medium, large, very large) (depends on stratification) ### by TERMINOLOGY/MODEL
    +
  • +
  • terminology dependent measure/rule (hysterectomy (using SNOMED (SCT0013513) (Athena CIDs)) (ICD9CM, 10PCS, CPT)
  • +
  • terminology independent measure/rule (eg, at least 1 numerical lab result value in 1000 person sample)
  • +
  • model independent measure/rule (eg, zombie events, prior conception events)
  • +
+
+
+
+

By outputed results

+
+

Stratified analyses

+

These anlyses use table ACHILLES_results

+
+
+

distributions

+

Such analyses use table ACHILLES_results_dist e.g., 103,104,105,106,107,203,206,211,403,406,506,511,512,513,514,515,603,606,704,706,715,716,717,803,806,815

+
+
+
+

By nature

+
+

general

+

Some analyses are checking data size (and useful in general)
+### conformance to data model Other analyses have only while others are s ### data quality specific analyses e.g., analysis_id 7,8,9,207

+
+
+
+
+

Analyzing Heel Results

+
+

Simple rules:

+

There are simple rules that generate a single error or warning.

+
+
+

Complex rules

+

However, some rules (e.g., rule_id 6) can generate multiple rows. The true primary key for output is combination of rule_id and analysis_id

+
+
+ + + + +
+ + + + + + + + diff --git a/extras/notes.md b/extras/notes.md old mode 100644 new mode 100755 index 92bf36b8..ca064319 --- a/extras/notes.md +++ b/extras/notes.md @@ -1,9 +1,5 @@ -#Cost tables -CDM version 5.0 had cost tables that were deleted in version 5.0.1. To avoid errors, use a parameter when calling the achilles function that specifies runCostAnalysis = FALSE - - -#How to run Achilles Heel only: +# How to run Achilles Heel only: Execution of all analyses computations is not necessary if all you want to do is to run new data quality measures in a revised version of Heel. Instead of 10+ hours, you can be done in few minutes with running just heel ``` @@ -20,7 +16,7 @@ Execution of all analyses computations is not necessary if all you want to do is ``` -#How to save time on running full Achilles - make it finish much earlier +# How to save time on running full Achilles - make it finish much earlier If you are willing to skip cost analyses (not used very often), this smaller set of analyses will finish much earlier ``` #get all possible analyses first @@ -43,7 +39,7 @@ achillesResults <- achilles(connectionDetails, cdmDatabaseSchema="cdm5_inst", vocabDatabaseSchema="vocabulary",cdmVersion = "5",analysisIds = subSet1) ``` -#Execute only few new analyses +# Execute only few new analyses Achilles can take a long time to execute. To see new analyses, it is possible to only execute those new analyses. E.g., newly integrated Iris analyses. Use the following code that specifies a set of analysis_id's. The key is to specify which anlayses to run, and to specify createTables to FALSE so that this execution will preserve results previously executed. @@ -59,8 +55,8 @@ achillesResults <- achilles(connectionDetails,cdmDatabaseSchema=cdmDatabaseSchem createTable = F,analysisIds = c(2000,2001)) ``` -#Small maintenance tasks for the package -##update CSV overview file for analyses +# Small maintenance tasks for the package +## update CSV overview file for analyses ```R connectionDetails$schema=resultsDatabaseSchema conn<-connect(connectionDetails) @@ -81,7 +77,7 @@ insertTable(conn,'achilles_rule',achilles_rule) ``` -##overview html files +## overview html files The code below updates html files that show content overview. Use rawgit.com/OHDSI/... to view it nicely. ```R tempf<-tempfile(pattern = 'temp', fileext = '.Rmd') @@ -100,7 +96,7 @@ rmarkdown::render(tempf,output_file = 'c:/temp/Rule-Drill-Down.html',rmarkdown:: -#Data Quality CDM +# Data Quality CDM These notes relate Achilles and Achilles Heel to Data Quality CDM (DQ CDM) DQM terminology is slightly different @@ -110,43 +106,43 @@ analysis = measure stratum = dimension rule = check -##Classification of measures -###by PURPOSE +## Classification of measures +### by PURPOSE - general purpose measure (% of males) - measure specific for DQ (count of rows with invalid provider_id) -###by OUTPUT +### by OUTPUT - single row measure (count of providers) - multiple rows measure (medium, large, very large) (depends on stratification) -###by TERMINOLOGY/MODEL +### by TERMINOLOGY/MODEL - terminology dependent measure/rule (hysterectomy (using SNOMED (SCT0013513) (Athena CIDs)) (ICD9CM, 10PCS, CPT) - terminology independent measure/rule (eg, at least 1 numerical lab result value in 1000 person sample) - model independent measure/rule (eg, zombie events, prior conception events) -##By outputed results -###Stratified analyses +## By outputed results +### Stratified analyses These anlyses use table ACHILLES_results -###distributions +### distributions Such analyses use table ACHILLES_results_dist e.g., 103,104,105,106,107,203,206,211,403,406,506,511,512,513,514,515,603,606,704,706,715,716,717,803,806,815 -##By nature +## By nature -###general +### general Some analyses are checking data size (and useful in general) -###conformance to data model +### conformance to data model Other analyses have only while others are s -###data quality specific analyses +### data quality specific analyses e.g., analysis_id 7,8,9,207 -#Analyzing Heel Results -###Simple rules: +# Analyzing Heel Results +### Simple rules: There are simple rules that generate a single error or warning. -###Complex rules -However, some rules (e.g., rule_id 6) can generate multiple rows. The true primary key for output is combination of rule_id and analysis_id \ No newline at end of file +### Complex rules +However, some rules (e.g., rule_id 6) can generate multiple rows. The true primary key for output is combination of rule_id and analysis_id diff --git a/inst/csv/achilles/achilles_analysis_details.csv b/inst/csv/achilles/achilles_analysis_details.csv new file mode 100644 index 00000000..007fd64e --- /dev/null +++ b/inst/csv/achilles/achilles_analysis_details.csv @@ -0,0 +1,216 @@ +ANALYSIS_ID,DISTRIBUTION,COST,DISTRIBUTED_FIELD,ANALYSIS_NAME,STRATUM_1_NAME,STRATUM_2_NAME,STRATUM_3_NAME,STRATUM_4_NAME,STRATUM_5_NAME +0,-1,0,,Source name,,,,, +1,0,0,,Number of persons,,,,, +2,0,0,,Number of persons by gender,gender_concept_id,,,, +3,0,0,,Number of persons by year of birth,year_of_birth,,,, +4,0,0,,Number of persons by race,race_concept_id,,,, +5,0,0,,Number of persons by ethnicity,ethnicity_concept_id,,,, +7,0,0,,Number of persons with invalid provider_id,,,,, +8,0,0,,Number of persons with invalid location_id,,,,, +9,0,0,,Number of persons with invalid care_site_id,,,,, +10,0,0,,Number of all persons by year of birth by gender,year_of_birth,gender_concept_id,,, +11,0,0,,Number of non-deceased persons by year of birth by gender,year_of_birth,gender_concept_id,,, +12,0,0,,Number of persons by race and ethnicity,race_concept_id,ethnicity_concept_id,,, +101,0,0,,"Number of persons by age, with age at first observation period",age,,,, +102,0,0,,"Number of persons by gender by age, with age at first observation period",gender_concept_id,age,,, +103,1,0,,Distribution of age at first observation period,,,,, +104,1,0,,Distribution of age at first observation period by gender,gender_concept_id,,,, +105,1,0,,Length of observation (days) of first observation period,,,,, +106,1,0,,Length of observation (days) of first observation period by gender,gender_concept_id,,,, +107,1,0,,Length of observation (days) of first observation period by age decile,age decile,,,, +108,0,0,,"Number of persons by length of observation period, in 30d increments",Observation period length 30d increments,,,, +109,0,0,,Number of persons with continuous observation in each year,calendar year,,,, +110,0,0,,Number of persons with continuous observation in each month,calendar month,,,, +111,0,0,,Number of persons by observation period start month,calendar month,,,, +112,0,0,,Number of persons by observation period end month,calendar month,,,, +113,0,0,,Number of persons by number of observation periods,number of observation periods,,,, +114,0,0,,Number of persons with observation period before year-of-birth,,,,, +115,0,0,,Number of persons with observation period end < observation period start,,,,, +116,0,0,,Number of persons with at least one day of observation in each year by gender and age decile,calendar year,gender_concept_id,age decile,, +117,0,0,,Number of persons with at least one day of observation in each month,calendar month,,,, +118,0,0,,Number of observation periods with invalid person_id,,,,, +119,0,0,,Number of observation period records by period_type_concept_id,period_type_concept_id,,,, +200,0,0,,"Number of persons with at least one visit occurrence, by visit_concept_id",visit_concept_id,,,, +201,0,0,,"Number of visit occurrence records, by visit_concept_id",visit_concept_id,,,, +202,0,0,,"Number of persons by visit occurrence start month, by visit_concept_id",visit_concept_id,calendar month,,, +203,1,0,,Number of distinct visit occurrence concepts per person,,,,, +204,0,0,,"Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile",visit_concept_id,calendar year,gender_concept_id,age decile, +206,1,0,,Distribution of age by visit_concept_id,visit_concept_id,gender_concept_id,,, +207,0,0,,Number of visit records with invalid person_id,,,,, +208,0,0,,Number of visit records outside valid observation period,,,,, +209,0,0,,Number of visit records with end date < start date,,,,, +210,0,0,,Number of visit records with invalid care_site_id,,,,, +211,1,0,,Distribution of length of stay by visit_concept_id,visit_concept_id,,,, +212,0,0,,"Number of persons with at least one visit occurrence, by calendar year by gender by age decile",calendar year,gender_concept_id,age decile,, +220,0,0,,Number of visit occurrence records by visit occurrence start month,calendar month,,,, +221,0,0,,Number of persons by visit start year,calendar year,,,, +300,0,0,,Number of providers,,,,, +301,0,0,,Number of providers by specialty concept_id,specialty_concept_id,,,, +302,0,0,,Number of providers with invalid care site id,,,,, +400,0,0,,"Number of persons with at least one condition occurrence, by condition_concept_id",condition_concept_id,,,, +401,0,0,,"Number of condition occurrence records, by condition_concept_id",condition_concept_id,,,, +402,0,0,,"Number of persons by condition occurrence start month, by condition_concept_id",condition_concept_id,calendar month,,, +403,1,0,,Number of distinct condition occurrence concepts per person,,,,, +404,0,0,,"Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile",condition_concept_id,calendar year,gender_concept_id,age decile, +405,0,0,,"Number of condition occurrence records, by condition_concept_id by condition_type_concept_id",condition_concept_id,condition_type_concept_id,,, +406,1,0,,Distribution of age by condition_concept_id,condition_concept_id,gender_concept_id,,, +409,0,0,,Number of condition occurrence records with invalid person_id,,,,, +410,0,0,,Number of condition occurrence records outside valid observation period,,,,, +411,0,0,,Number of condition occurrence records with end date < start date,,,,, +412,0,0,,Number of condition occurrence records with invalid provider_id,,,,, +413,0,0,,Number of condition occurrence records with invalid visit_id,,,,, +420,0,0,,Number of condition occurrence records by condition occurrence start month,calendar month,,,, +500,0,0,,"Number of persons with death, by cause_concept_id",cause_concept_id,,,, +501,0,0,,"Number of records of death, by cause_concept_id",cause_concept_id,,,, +502,0,0,,Number of persons by death month,calendar month,,,, +504,0,0,,"Number of persons with a death, by calendar year by gender by age decile",calendar year,gender_concept_id,age decile,, +505,0,0,,"Number of death records, by death_type_concept_id",death_type_concept_id,,,, +506,1,0,,Distribution of age at death by gender,gender_concept_id,,,, +509,0,0,,Number of death records with invalid person_id,,,,, +510,0,0,,Number of death records outside valid observation period,,,,, +511,1,0,,Distribution of time from death to last condition,,,,, +512,1,0,,Distribution of time from death to last drug,,,,, +513,1,0,,Distribution of time from death to last visit,,,,, +514,1,0,,Distribution of time from death to last procedure,,,,, +515,1,0,,Distribution of time from death to last observation,,,,, +600,0,0,,"Number of persons with at least one procedure occurrence, by procedure_concept_id",procedure_concept_id,,,, +601,0,0,,"Number of procedure occurrence records, by procedure_concept_id",procedure_concept_id,,,, +602,0,0,,"Number of persons by procedure occurrence start month, by procedure_concept_id",procedure_concept_id,calendar month,,, +603,1,0,,Number of distinct procedure occurrence concepts per person,,,,, +604,0,0,,"Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile",procedure_concept_id,calendar year,gender_concept_id,age decile, +605,0,0,,"Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id",procedure_concept_id,procedure_type_concept_id,,, +606,1,0,,Distribution of age by procedure_concept_id,procedure_concept_id,gender_concept_id,,, +609,0,0,,Number of procedure occurrence records with invalid person_id,,,,, +610,0,0,,Number of procedure occurrence records outside valid observation period,,,,, +612,0,0,,Number of procedure occurrence records with invalid provider_id,,,,, +613,0,0,,Number of procedure occurrence records with invalid visit_id,,,,, +620,0,0,,Number of procedure occurrence records by procedure occurrence start month,calendar month,,,, +691,0,0,,Percentage of total persons that have at least x procedures,procedure_concept_id,procedure_person,,, +700,0,0,,"Number of persons with at least one drug exposure, by drug_concept_id",drug_concept_id,,,, +701,0,0,,"Number of drug exposure records, by drug_concept_id",drug_concept_id,,,, +702,0,0,,"Number of persons by drug exposure start month, by drug_concept_id",drug_concept_id,calendar month,,, +703,1,0,,Number of distinct drug exposure concepts per person,,,,, +704,0,0,,"Number of persons with at least one drug exposure, by drug_concept_id by calendar year by gender by age decile",drug_concept_id,calendar year,gender_concept_id,age decile, +705,0,0,,"Number of drug exposure records, by drug_concept_id by drug_type_concept_id",drug_concept_id,drug_type_concept_id,,, +706,1,0,,Distribution of age by drug_concept_id,drug_concept_id,gender_concept_id,,, +709,0,0,,Number of drug exposure records with invalid person_id,,,,, +710,0,0,,Number of drug exposure records outside valid observation period,,,,, +711,0,0,,Number of drug exposure records with end date < start date,,,,, +712,0,0,,Number of drug exposure records with invalid provider_id,,,,, +713,0,0,,Number of drug exposure records with invalid visit_id,,,,, +715,1,0,,Distribution of days_supply by drug_concept_id,drug_concept_id,,,, +716,1,0,,Distribution of refills by drug_concept_id,drug_concept_id,,,, +717,1,0,,Distribution of quantity by drug_concept_id,drug_concept_id,,,, +720,0,0,,Number of drug exposure records by drug exposure start month,calendar month,,,, +791,0,0,,Percentage of total persons that have at least x drug exposures,drug_concept_id,drug_person,,, +800,0,0,,"Number of persons with at least one observation occurrence, by observation_concept_id",observation_concept_id,,,, +801,0,0,,"Number of observation occurrence records, by observation_concept_id",observation_concept_id,,,, +802,0,0,,"Number of persons by observation occurrence start month, by observation_concept_id",observation_concept_id,calendar month,,, +803,1,0,,Number of distinct observation occurrence concepts per person,,,,, +804,0,0,,"Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile",observation_concept_id,calendar year,gender_concept_id,age decile, +805,0,0,,"Number of observation occurrence records, by observation_concept_id by observation_type_concept_id",observation_concept_id,observation_type_concept_id,,, +806,1,0,,Distribution of age by observation_concept_id,observation_concept_id,gender_concept_id,,, +807,0,0,,"Number of observation occurrence records, by observation_concept_id and unit_concept_id",observation_concept_id,unit_concept_id,,, +809,0,0,,Number of observation records with invalid person_id,,,,, +810,0,0,,Number of observation records outside valid observation period,,,,, +812,0,0,,Number of observation records with invalid provider_id,,,,, +813,0,0,,Number of observation records with invalid visit_id,,,,, +814,0,0,,"Number of observation records with no value (numeric, string, or concept)",,,,, +815,1,0,,"Distribution of numeric values, by observation_concept_id and unit_concept_id",,,,, +820,0,0,,Number of observation records by observation start month,calendar month,,,, +891,0,0,,Percentage of total persons that have at least x observations,observation_concept_id,observation_person,,, +900,0,0,,"Number of persons with at least one drug era, by drug_concept_id",drug_concept_id,,,, +901,0,0,,"Number of drug era records, by drug_concept_id",drug_concept_id,,,, +902,0,0,,"Number of persons by drug era start month, by drug_concept_id",drug_concept_id,calendar month,,, +903,1,0,,Number of distinct drug era concepts per person,,,,, +904,0,0,,"Number of persons with at least one drug era, by drug_concept_id by calendar year by gender by age decile",drug_concept_id,calendar year,gender_concept_id,age decile, +906,1,0,,Distribution of age by drug_concept_id,drug_concept_id,gender_concept_id,,, +907,1,0,,"Distribution of drug era length, by drug_concept_id",drug_concept_id,,,, +908,0,0,,Number of drug eras without valid person,,,,, +909,0,0,,Number of drug eras outside valid observation period,,,,, +910,0,0,,Number of drug eras with end date < start date,,,,, +920,0,0,,Number of drug era records by drug era start month,calendar month,,,, +1000,0,0,,"Number of persons with at least one condition era, by condition_concept_id",condition_concept_id,,,, +1001,0,0,,"Number of condition era records, by condition_concept_id",condition_concept_id,,,, +1002,0,0,,"Number of persons by condition era start month, by condition_concept_id",condition_concept_id,calendar month,,, +1003,1,0,,Number of distinct condition era concepts per person,,,,, +1004,0,0,,"Number of persons with at least one condition era, by condition_concept_id by calendar year by gender by age decile",condition_concept_id,calendar year,gender_concept_id,age decile, +1006,1,0,,Distribution of age by condition_concept_id,condition_concept_id,gender_concept_id,,, +1007,1,0,,"Distribution of condition era length, by condition_concept_id",condition_concept_id,,,, +1008,0,0,,Number of condition eras without valid person,,,,, +1009,0,0,,Number of condition eras outside valid observation period,,,,, +1010,0,0,,Number of condition eras with end date < start date,,,,, +1020,0,0,,Number of condition era records by condition era start month,calendar month,,,, +1100,0,0,,Number of persons by location 3-digit zip,3-digit zip,,,, +1101,0,0,,Number of persons by location state,state,,,, +1102,0,0,,Number of care sites by location 3-digit zip,3-digit zip,,,, +1103,0,0,,Number of care sites by location state,state,,,, +1200,0,0,,Number of persons by place of service,place_of_service_concept_id,,,, +1201,0,0,,Number of visits by place of service,place_of_service_concept_id,,,, +1202,0,0,,Number of care sites by place of service,place_of_service_concept_id,,,, +1406,1,0,,Length of payer plan (days) of first payer plan period by gender,gender_concept_id,,,, +1407,1,0,,Length of payer plan (days) of first payer plan period by age decile,age_decile,,,, +1408,0,0,,"Number of persons by length of payer plan period, in 30d increments",payer plan period length 30d increments,,,, +1409,0,0,,Number of persons with continuous payer plan in each year,calendar year,,,, +1410,0,0,,Number of persons with continuous payer plan in each month,calendar month,,,, +1411,0,0,,Number of persons by payer plan period start month,calendar month,,,, +1412,0,0,,Number of persons by payer plan period end month,calendar month,,,, +1413,0,0,,Number of persons by number of payer plan periods,number of payer plan periods,,,, +1414,0,0,,Number of persons with payer plan period before year-of-birth,,,,, +1415,0,0,,Number of persons with payer plan period end < payer plan period start,,,,, +1500,0,1,,Number of drug cost records with invalid drug exposure id,,,,, +1501,0,1,,Number of drug cost records with invalid payer plan period id,,,,, +1502,1,1,paid_copay,"Distribution of paid copay, by drug_concept_id",drug_concept_id,,,, +1503,1,1,paid_coinsurance,"Distribution of paid coinsurance, by drug_concept_id",drug_concept_id,,,, +1504,1,1,paid_toward_deductible,"Distribution of paid toward deductible, by drug_concept_id",drug_concept_id,,,, +1505,1,1,paid_by_payer,"Distribution of paid by payer, by drug_concept_id",drug_concept_id,,,, +1506,1,1,paid_by_coordination_benefits,"Distribution of paid by coordination of benefit, by drug_concept_id",drug_concept_id,,,, +1507,1,1,total_out_of_pocket,"Distribution of total out-of-pocket, by drug_concept_id",drug_concept_id,,,, +1508,1,1,total_paid,"Distribution of total paid, by drug_concept_id",drug_concept_id,,,, +1509,1,1,ingredient_cost,"Distribution of ingredient_cost, by drug_concept_id",drug_concept_id,,,, +1510,1,1,dispensing_fee,"Distribution of dispensing fee, by drug_concept_id",drug_concept_id,,,, +1511,1,1,average_wholesale_price,"Distribution of average wholesale price, by drug_concept_id",drug_concept_id,,,, +1600,0,1,,Number of procedure cost records with invalid procedure occurrence id,,,,, +1601,0,1,,Number of procedure cost records with invalid payer plan period id,,,,, +1602,1,1,paid_copay,"Distribution of paid copay, by procedure_concept_id",procedure_concept_id,,,, +1603,1,1,paid_coinsurance,"Distribution of paid coinsurance, by procedure_concept_id",procedure_concept_id,,,, +1604,1,1,paid_toward_deductible,"Distribution of paid toward deductible, by procedure_concept_id",procedure_concept_id,,,, +1605,1,1,paid_by_payer,"Distribution of paid by payer, by procedure_concept_id",procedure_concept_id,,,, +1606,1,1,paid_by_coordination_benefits,"Distribution of paid by coordination of benefit, by procedure_concept_id",procedure_concept_id,,,, +1607,1,1,total_out_of_pocket,"Distribution of total out-of-pocket, by procedure_concept_id",procedure_concept_id,,,, +1608,1,1,total_paid,"Distribution of total paid, by procedure_concept_id",procedure_concept_id,,,, +1610,0,1,,Number of records by revenue_code_concept_id,revenue_code_concept_id,,,, +1700,0,0,,Number of records by cohort_concept_id,cohort_concept_id,,,, +1701,0,0,,Number of records with cohort end date < cohort start date,,,,, +1800,0,0,,"Number of persons with at least one measurement occurrence, by measurement_concept_id",measurement_concept_id,,,, +1801,0,0,,"Number of measurement occurrence records, by measurement_concept_id",measurement_concept_id,,,, +1802,0,0,,"Number of persons by measurement occurrence start month, by measurement_concept_id",measurement_concept_id,calendar month,,, +1803,1,0,,Number of distinct mesurement occurrence concepts per person,,,,, +1804,0,0,,"Number of persons with at least one mesurement occurrence, by measurement_concept_id by calendar year by gender by age decile",measurement_concept_id,calendar year,gender_concept_id,age decile, +1805,0,0,,"Number of measurement occurrence records, by measurement_concept_id by measurement_type_concept_id",measurement_concept_id,measurement_type_concept_id,,, +1806,1,0,,Distribution of age by measurement_concept_id,measurement_concept_id,gender_concept_id,,, +1807,0,0,,"Number of measurement occurrence records, by measurement_concept_id and unit_concept_id",measurement_concept_id,unit_concept_id,,, +1809,0,0,,Number of measurement records with invalid person_id,,,,, +1810,0,0,,Number of measurement records outside valid observation period,,,,, +1812,0,0,,Number of measurement records with invalid provider_id,,,,, +1813,0,0,,Number of measurement records with invalid visit_id,,,,, +1814,0,0,,"Number of measurement records with no value (numeric, string, or concept)",,,,, +1815,1,0,,"Distribution of numeric values, by measurement_concept_id and unit_concept_id",,,,, +1816,1,0,,"Distribution of low range, by measurement_concept_id and unit_concept_id",,,,, +1817,1,0,,"Distribution of high range, by observation_concept_id and unit_concept_id",,,,, +1818,0,0,,"Number of measurement records below/within/above normal range, by measurement_concept_id and unit_concept_id",,,,, +1820,0,0,,Number of measurement records by measurement start month,calendar month,,,, +1821,0,0,,Number of measurement records with no numeric value,,,,, +1891,0,0,,Percentage of total persons that have at least x measurements,measurement_concept_id,measurement_person,,, +1900,0,0,,"Source values mapped to concept_id 0 by table, by source_value",table_name,source_value,,, +2000,0,0,,Number of patients with at least 1 Dx and 1 Rx,,,,, +2001,0,0,,Number of patients with at least 1 Dx and 1 Proc,,,,, +2002,0,0,,"Number of patients with at least 1 Meas, 1 Dx and 1 Rx",,,,, +2003,0,0,,Number of patients with at least 1 Visit,,,,, +2100,0,0,,"Number of persons with at least one device exposure, by device_concept_id",device_concept_id,,,, +2101,0,0,,"Number of device exposure records, by device_concept_id",device_concept_id,,,, +2102,0,0,,"Number of persons by device records start month, by device_concept_id",device_concept_id,calendar month,,, +2104,0,0,,"Number of persons with at least one device exposure, by device_concept_id by calendar year by gender by age decile",device_concept_id,calendar year,gender_concept_id,age decile, +2105,0,0,,"Number of device exposure records, by device_concept_id by device_type_concept_id",device_concept_id,device_type_concept_id,,, +2200,0,0,,Number of persons with at least one note by note_type_concept_id,note_type_concept_id,,,, +2201,0,0,,"Number of note records, by note_type_concept_id",note_type_concept_id,,,, diff --git a/inst/csv/achilles/achilles_cost_columns.csv b/inst/csv/achilles/achilles_cost_columns.csv new file mode 100644 index 00000000..fdf3bc85 --- /dev/null +++ b/inst/csv/achilles/achilles_cost_columns.csv @@ -0,0 +1,18 @@ +DOMAIN_ID,OLD,CURRENT +Drug,paid_copay,paid_patient_copay +Drug,paid_coinsurance,paid_patient_coinsurance +Drug,paid_toward_deductible,paid_patient_deductible +Drug,paid_by_payer,paid_by_payer +Drug,paid_by_coordination_benefits,paid_by_primary +Drug,total_out_of_pocket,paid_by_patient +Drug,total_paid,total_paid +Drug,ingredient_cost,paid_ingredient_cost +Drug,dispensing_fee,paid_dispensing_fee +Drug,average_wholesale_price,total_cost +Procedure,paid_copay,paid_patient_copay +Procedure,paid_coinsurance,paid_patient_coinsurance +Procedure,paid_toward_deductible,paid_patient_deductible +Procedure,paid_by_payer,paid_by_payer +Procedure,paid_by_coordination_benefits,paid_by_primary +Procedure,total_out_of_pocket,paid_by_patient +Procedure,total_paid,total_paid \ No newline at end of file diff --git a/inst/csv/achilles_rule.csv b/inst/csv/achilles_rule.csv deleted file mode 100644 index 16bf0636..00000000 --- a/inst/csv/achilles_rule.csv +++ /dev/null @@ -1,46 +0,0 @@ -rule_id,rule_name,severity,rule_type,rule_description,threshold,rule_classification,rule_scope,linked_measure -0,Achilles Heel version 1.5,,,this rule is not used for data analysis. It communicates the version of the ruleset.,,,, -1,multiple checks for greater than zero,error,DQ,umbrella rule: this rule includes multiple error checks on over 35 analysis_ids,>0,complex,, -2,multiple checks where minimum value of a measure should not be negative,error,DQ,umbrella rule: this rule includes multiple error checks on over 20 analysis_ids where min value in distribution should not be negative,,complex,, -3,multiple checks related to death data where maximum value of a measure should not be positive,warning,DQ,death distributions where max should not be positive (using anlyses 511;512;513;514;515),,plausibility,, -4,invalid concept_id,error,CDM conformance,invalid concept_id,,,, -5,invalid type concept_id,error,CDM conformance,invalid type concept_id,,,, -6,data with unmapped concepts,warning,DQ,for multiple analyses,,,, -7,concept from the wrong vocabulary,error,CDM conformance,concept from the wrong vocabulary,,,,2 -8,concept from the wrong vocabulary; race,error,CDM conformance,concept from the wrong vocabulary; race,,,,4 -9,concept from the wrong vocabulary; ethnicity,error,CDM conformance,concept from the wrong vocabulary; ethnicity,,,,5 -10,concept from the wrong vocabulary; place of service,error,CDM conformance,concept from the wrong vocabulary; place of service,,,,202 -11,incorrect terminology,error,CDM conformance,specialty - 48 specialty,,,,301 -12,Dx is not a SNOMED code,error,CDM conformance,"concept from the wrong vocabulary; Condition Occurrence, Condition Era (SNOMED)",,,,400;1000 -13,Drug is not RxNorm concept,error,CDM conformance,"concept from the wrong vocabulary; Drug Exposure, Drug Era (RxNorm)",,,,700;900 -14,"Procedure is not CPT, ICD9Proc or HCPCS",error,CDM conformance,procedure - 4 CPT4/5 HCPCS/3 ICD9P,,,,600 -15,incorrect terminology,error,CDM conformance,CDM V4 only:LOINC,,,, -16,incorrect terminology,error,CDM conformance,CDM v4 only:DRG,,,, -17,incorrect terminology,error,CDM conformance,revenue code - 43 revenue code,,,,1610 -18,year of birth is in the future,error,DQ,year of birth should not be in the future ,,plausibility,,3 -19,year of birth is prior 1800,warning,DQ, year of birth < 1800,<1800,plausibility,,3 -20,age below 0,error,DQ,age < 0,<0,plausibility,,101 -21,age too high,error,DQ,age > 150,>150,plausibility,,101 -22,monthly trend,warning,DQ,monthly change > 100%,,fidelity,, -23,monthly trend,warning,DQ,monthly change > 100% at concept level,,fidelity,, -24,too high days_supply,warning,DQ,days_supply > 180,,plausibility,, -25,too high number of refils,warning,DQ,refills > 10,>10,plausibility,,716 -26,implausible quantity for drug,warning,DQ,quantity > 600,>600,plausibility,,717 -27,more than 1 percent of unmapped rows (concept_0 rows),warning,DQ,for multiple analyses (4xx;6xx;7xx;8xx;18xx),>1,completeness,,UnmappedData:byDomain:Percentage -28,percentage of non-numerical measurement records exceeds general population threshold,warning,DQ,"typically, measurement data contans a significant proportion of rows with numerical result. This rule looks at rows in MEASUREMENT and alerts the user if a large proportion of rows lack any numerical result",>=80,completeness,GeneralPopulationOnly,Meas:NoNumValue:Percentage -29,infant diagnosis at senior age of over 50yo,error,DQ,mecconium condition 195075; This rule is example of a terminology depended data quality tool,,plausibility,,404 -31,ratio of providers to total patients,notification,DQ,This rules fires if data indicate a high number of patients and only a few providers exist. ,,plausibility,,Provider:PatientProviderRatio -32,Percentage of patients with no visits exceeds threshold,notification,DQ,checks if the percentage of patients with no visits exceeds threshold,>5,plausibility,,ach_2003:Percentage -33,[GeneralPopulationOnly] Not all deciles represented at first observation,notification,DQ,"in a general population, a database would observe first visit across all age groups. We at least expect deciles 0 to 8. Rule looks at the count of deciles.",<9,completeness,GeneralPopulationOnly,AgeAtFirstObsByDecile:DecileCnt -34,Count of unmapped source values in a domain exceeds threshold,notification,DQ,"looks at values that are mapped to concept0 and their source values by table, rule 6 is related to this rule but it does not look at the size of the problem (only if unmapped data are present or not present)",,completeness,,UnmappedDataByDomain:SourceValueCnt -35,Count of measurement_ids with more than 5 distinct units exceeds threshold,notification,DQ,"Idealy, each measurement would use only one unit. For example, kg for weight. This rule notifies the user if database has measurements that have 5 or more units. This rule has technically thresholds. ",>=5;>=10,fidelity,,1807 -36,age too high2,warning,DQ,age > 125; same as rule 21 but as warning flavor; same threshold is used by Sentinel DQA,>125,plausibility:temporal,,101 -37,Notes data density,notification,DQ,"This rule is measuring data density on visit level (in addition to patient and dataset level); Assumption is that at least one data event (e.g., diagnosis, note) is generated for each visit; This rule is testing that at least some notes exist (considering the number of visits); for datasets with zero notes the derived measure is null and rule does not fire at all; current rule is on overall data density (for notes only) per visit level",<0.01,completeness,,Note:NoteVisitRatio -38,Provider Specialty data density,notification,DQ,"in a general dataset, it is expected that more than providers with a wide range of specialties (at least more than just one specialty) is present; notification may indicate that provider table is missing data on specialty,typical datat has at least 28 specialties present in provider table",<2,completeness,GeneralPopulationOnly,Provider:SpecialtyCnt -39,Too high Born to Deceased Ratio in some years,notification,DQ,"Given lifetime record DQ assumption if more than 30k patients is born for every deceased patient the dataset may not be recording complete records for all senior patients in that year -",>30000,completeness,GeneralPopulationOnly,Death:BornDeceasedRatio -40,Death event outside observation period,error,DQ,death event should not be outside observation period; this rule was if previous versions subsumed in umbrella rule,,completeness,,510 -41,No weight data in MEASUREMENT table,notification,DQ,implementation of similar Sentinel rule for certain vital signs; rule lukes at concept_id 3025315 (LOINC code 29463-7)),,completeness,,1800 -42,Percentage of outpatient visits is too low,notification,DQ,"Rule is looking at percentage of outpatient visits. If this measure is too low (e.g. 5 percent), it may indicate a predominantly inpatient dataset. Threshold was decided on DQ-Study 2. General population only rule.",<0.42,completeness,GeneralPopulationOnly,201 -43,99+ percent of persons have exactly one observation period,notification,DQ,Some datasets cannot provide observation period data based on health insurance start and stop dates. Rule notifies a user if 99+% of patients have exactly one observation period. ,>=99.0,completeness,,113 -44,"Percentage of patients with at least 1 Measurement, 1 Dx and 1 Rx is below threshold",notification,DQ,This notification may indicate that a significant percentage of patients is missing data for either Measurement or Diagnosis or Medication. Many clinical studies may want to require at least some data in all three domains. Threshold was decided empirically in OHDSI DQ Study ,,completeness,,2002 diff --git a/inst/csv/analysisDetails.csv b/inst/csv/analysisDetails.csv deleted file mode 100644 index e799dbab..00000000 --- a/inst/csv/analysisDetails.csv +++ /dev/null @@ -1,216 +0,0 @@ -"ANALYSIS_ID","ANALYSIS_NAME","STRATUM_1_NAME","STRATUM_2_NAME","STRATUM_3_NAME","STRATUM_4_NAME","STRATUM_5_NAME" -0,"Source name",,,,, -1,"Number of persons",,,,, -2,"Number of persons by gender","gender_concept_id",,,, -3,"Number of persons by year of birth","year_of_birth",,,, -4,"Number of persons by race","race_concept_id",,,, -5,"Number of persons by ethnicity","ethnicity_concept_id",,,, -7,"Number of persons with invalid provider_id",,,,, -8,"Number of persons with invalid location_id",,,,, -9,"Number of persons with invalid care_site_id",,,,, -10,"Number of all persons by year of birth by gender","year_of_birth","gender_concept_id",,, -11,"Number of non-deceased persons by year of birth by gender","year_of_birth","gender_concept_id",,, -12,"Number of persons by race and ethnicity","race_concept_id","ethnicity_concept_id",,, -101,"Number of persons by age, with age at first observation period","age",,,, -102,"Number of persons by gender by age, with age at first observation period","gender_concept_id","age",,, -103,"Distribution of age at first observation period",,,,, -104,"Distribution of age at first observation period by gender","gender_concept_id",,,, -105,"Length of observation (days) of first observation period",,,,, -106,"Length of observation (days) of first observation period by gender","gender_concept_id",,,, -107,"Length of observation (days) of first observation period by age decile","age decile",,,, -108,"Number of persons by length of observation period, in 30d increments","Observation period length 30d increments",,,, -109,"Number of persons with continuous observation in each year","calendar year",,,, -110,"Number of persons with continuous observation in each month","calendar month",,,, -111,"Number of persons by observation period start month","calendar month",,,, -112,"Number of persons by observation period end month","calendar month",,,, -113,"Number of persons by number of observation periods","number of observation periods",,,, -114,"Number of persons with observation period before year-of-birth",,,,, -115,"Number of persons with observation period end < observation period start",,,,, -116,"Number of persons with at least one day of observation in each year by gender and age decile","calendar year","gender_concept_id","age decile",, -117,"Number of persons with at least one day of observation in each month","calendar month",,,, -118,"Number of observation periods with invalid person_id",,,,, -119,"Number of observation period records by period_type_concept_id","period_type_concept_id",,,, -200,"Number of persons with at least one visit occurrence, by visit_concept_id","visit_concept_id",,,, -201,"Number of visit occurrence records, by visit_concept_id","visit_concept_id",,,, -202,"Number of persons by visit occurrence start month, by visit_concept_id","visit_concept_id","calendar month",,, -203,"Number of distinct visit occurrence concepts per person",,,,, -204,"Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile","visit_concept_id","calendar year","gender_concept_id","age decile", -206,"Distribution of age by visit_concept_id","visit_concept_id","gender_concept_id",,, -207,"Number of visit records with invalid person_id",,,,, -208,"Number of visit records outside valid observation period",,,,, -209,"Number of visit records with end date < start date",,,,, -210,"Number of visit records with invalid care_site_id",,,,, -211,"Distribution of length of stay by visit_concept_id","visit_concept_id",,,, -212,"Number of persons with at least one visit occurrence, by calendar year by gender by age decile","calendar year","gender_concept_id","age decile",, -220,"Number of visit occurrence records by visit occurrence start month","calendar month",,,, -221,"Number of persons by visit start year","calendar year",,,, -300,"Number of providers",,,,, -301,"Number of providers by specialty concept_id","specialty_concept_id",,,, -302,"Number of providers with invalid care site id",,,,, -400,"Number of persons with at least one condition occurrence, by condition_concept_id","condition_concept_id",,,, -401,"Number of condition occurrence records, by condition_concept_id","condition_concept_id",,,, -402,"Number of persons by condition occurrence start month, by condition_concept_id","condition_concept_id","calendar month",,, -403,"Number of distinct condition occurrence concepts per person",,,,, -404,"Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile","condition_concept_id","calendar year","gender_concept_id","age decile", -405,"Number of condition occurrence records, by condition_concept_id by condition_type_concept_id","condition_concept_id","condition_type_concept_id",,, -406,"Distribution of age by condition_concept_id","condition_concept_id","gender_concept_id",,, -409,"Number of condition occurrence records with invalid person_id",,,,, -410,"Number of condition occurrence records outside valid observation period",,,,, -411,"Number of condition occurrence records with end date < start date",,,,, -412,"Number of condition occurrence records with invalid provider_id",,,,, -413,"Number of condition occurrence records with invalid visit_id",,,,, -420,"Number of condition occurrence records by condition occurrence start month","calendar month",,,, -500,"Number of persons with death, by cause_concept_id","cause_concept_id",,,, -501,"Number of records of death, by cause_concept_id","cause_concept_id",,,, -502,"Number of persons by death month","calendar month",,,, -504,"Number of persons with a death, by calendar year by gender by age decile","calendar year","gender_concept_id","age decile",, -505,"Number of death records, by death_type_concept_id","death_type_concept_id",,,, -506,"Distribution of age at death by gender","gender_concept_id",,,, -509,"Number of death records with invalid person_id",,,,, -510,"Number of death records outside valid observation period",,,,, -511,"Distribution of time from death to last condition",,,,, -512,"Distribution of time from death to last drug",,,,, -513,"Distribution of time from death to last visit",,,,, -514,"Distribution of time from death to last procedure",,,,, -515,"Distribution of time from death to last observation",,,,, -600,"Number of persons with at least one procedure occurrence, by procedure_concept_id","procedure_concept_id",,,, -601,"Number of procedure occurrence records, by procedure_concept_id","procedure_concept_id",,,, -602,"Number of persons by procedure occurrence start month, by procedure_concept_id","procedure_concept_id","calendar month",,, -603,"Number of distinct procedure occurrence concepts per person",,,,, -604,"Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile","procedure_concept_id","calendar year","gender_concept_id","age decile", -605,"Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id","procedure_concept_id","procedure_type_concept_id",,, -606,"Distribution of age by procedure_concept_id","procedure_concept_id","gender_concept_id",,, -609,"Number of procedure occurrence records with invalid person_id",,,,, -610,"Number of procedure occurrence records outside valid observation period",,,,, -612,"Number of procedure occurrence records with invalid provider_id",,,,, -613,"Number of procedure occurrence records with invalid visit_id",,,,, -620,"Number of procedure occurrence records by procedure occurrence start month","calendar month",,,, -691,"Percentage of total persons that have at least x procedures","procedure_concept_id","procedure_person",,, -700,"Number of persons with at least one drug exposure, by drug_concept_id","drug_concept_id",,,, -701,"Number of drug exposure records, by drug_concept_id","drug_concept_id",,,, -702,"Number of persons by drug exposure start month, by drug_concept_id","drug_concept_id","calendar month",,, -703,"Number of distinct drug exposure concepts per person",,,,, -704,"Number of persons with at least one drug exposure, by drug_concept_id by calendar year by gender by age decile","drug_concept_id","calendar year","gender_concept_id","age decile", -705,"Number of drug exposure records, by drug_concept_id by drug_type_concept_id","drug_concept_id","drug_type_concept_id",,, -706,"Distribution of age by drug_concept_id","drug_concept_id","gender_concept_id",,, -709,"Number of drug exposure records with invalid person_id",,,,, -710,"Number of drug exposure records outside valid observation period",,,,, -711,"Number of drug exposure records with end date < start date",,,,, -712,"Number of drug exposure records with invalid provider_id",,,,, -713,"Number of drug exposure records with invalid visit_id",,,,, -715,"Distribution of days_supply by drug_concept_id","drug_concept_id",,,, -716,"Distribution of refills by drug_concept_id","drug_concept_id",,,, -717,"Distribution of quantity by drug_concept_id","drug_concept_id",,,, -720,"Number of drug exposure records by drug exposure start month","calendar month",,,, -791,"Percentage of total persons that have at least x drug exposures","drug_concept_id","drug_person",,, -800,"Number of persons with at least one observation occurrence, by observation_concept_id","observation_concept_id",,,, -801,"Number of observation occurrence records, by observation_concept_id","observation_concept_id",,,, -802,"Number of persons by observation occurrence start month, by observation_concept_id","observation_concept_id","calendar month",,, -803,"Number of distinct observation occurrence concepts per person",,,,, -804,"Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile","observation_concept_id","calendar year","gender_concept_id","age decile", -805,"Number of observation occurrence records, by observation_concept_id by observation_type_concept_id","observation_concept_id","observation_type_concept_id",,, -806,"Distribution of age by observation_concept_id","observation_concept_id","gender_concept_id",,, -807,"Number of observation occurrence records, by observation_concept_id and unit_concept_id","observation_concept_id","unit_concept_id",,, -809,"Number of observation records with invalid person_id",,,,, -810,"Number of observation records outside valid observation period",,,,, -812,"Number of observation records with invalid provider_id",,,,, -813,"Number of observation records with invalid visit_id",,,,, -814,"Number of observation records with no value (numeric, string, or concept)",,,,, -815,"Distribution of numeric values, by observation_concept_id and unit_concept_id",,,,, -820,"Number of observation records by observation start month","calendar month",,,, -891,"Percentage of total persons that have at least x observations","observation_concept_id","observation_person",,, -900,"Number of persons with at least one drug era, by drug_concept_id","drug_concept_id",,,, -901,"Number of drug era records, by drug_concept_id","drug_concept_id",,,, -902,"Number of persons by drug era start month, by drug_concept_id","drug_concept_id","calendar month",,, -903,"Number of distinct drug era concepts per person",,,,, -904,"Number of persons with at least one drug era, by drug_concept_id by calendar year by gender by age decile","drug_concept_id","calendar year","gender_concept_id","age decile", -906,"Distribution of age by drug_concept_id","drug_concept_id","gender_concept_id",,, -907,"Distribution of drug era length, by drug_concept_id","drug_concept_id",,,, -908,"Number of drug eras without valid person",,,,, -909,"Number of drug eras outside valid observation period",,,,, -910,"Number of drug eras with end date < start date",,,,, -920,"Number of drug era records by drug era start month","calendar month",,,, -1000,"Number of persons with at least one condition era, by condition_concept_id","condition_concept_id",,,, -1001,"Number of condition era records, by condition_concept_id","condition_concept_id",,,, -1002,"Number of persons by condition era start month, by condition_concept_id","condition_concept_id","calendar month",,, -1003,"Number of distinct condition era concepts per person",,,,, -1004,"Number of persons with at least one condition era, by condition_concept_id by calendar year by gender by age decile","condition_concept_id","calendar year","gender_concept_id","age decile", -1006,"Distribution of age by condition_concept_id","condition_concept_id","gender_concept_id",,, -1007,"Distribution of condition era length, by condition_concept_id","condition_concept_id",,,, -1008,"Number of condition eras without valid person",,,,, -1009,"Number of condition eras outside valid observation period",,,,, -1010,"Number of condition eras with end date < start date",,,,, -1020,"Number of condition era records by condition era start month","calendar month",,,, -1100,"Number of persons by location 3-digit zip","3-digit zip",,,, -1101,"Number of persons by location state","state",,,, -1102,"Number of care sites by location 3-digit zip","3-digit zip",,,, -1103,"Number of care sites by location state","state",,,, -1200,"Number of persons by place of service","place_of_service_concept_id",,,, -1201,"Number of visits by place of service","place_of_service_concept_id",,,, -1202,"Number of care sites by place of service","place_of_service_concept_id",,,, -1406,"Length of payer plan (days) of first payer plan period by gender","gender_concept_id",,,, -1407,"Length of payer plan (days) of first payer plan period by age decile","age_decile",,,, -1408,"Number of persons by length of payer plan period, in 30d increments","payer plan period length 30d increments",,,, -1409,"Number of persons with continuous payer plan in each year","calendar year",,,, -1410,"Number of persons with continuous payer plan in each month","calendar month",,,, -1411,"Number of persons by payer plan period start month","calendar month",,,, -1412,"Number of persons by payer plan period end month","calendar month",,,, -1413,"Number of persons by number of payer plan periods","number of payer plan periods",,,, -1414,"Number of persons with payer plan period before year-of-birth",,,,, -1415,"Number of persons with payer plan period end < payer plan period start",,,,, -1500,"Number of drug cost records with invalid drug exposure id",,,,, -1501,"Number of drug cost records with invalid payer plan period id",,,,, -1502,"Distribution of paid copay, by drug_concept_id","drug_concept_id",,,, -1503,"Distribution of paid coinsurance, by drug_concept_id","drug_concept_id",,,, -1504,"Distribution of paid toward deductible, by drug_concept_id","drug_concept_id",,,, -1505,"Distribution of paid by payer, by drug_concept_id","drug_concept_id",,,, -1506,"Distribution of paid by coordination of benefit, by drug_concept_id","drug_concept_id",,,, -1507,"Distribution of total out-of-pocket, by drug_concept_id","drug_concept_id",,,, -1508,"Distribution of total paid, by drug_concept_id","drug_concept_id",,,, -1509,"Distribution of ingredient_cost, by drug_concept_id","drug_concept_id",,,, -1510,"Distribution of dispensing fee, by drug_concept_id","drug_concept_id",,,, -1511,"Distribution of average wholesale price, by drug_concept_id","drug_concept_id",,,, -1600,"Number of procedure cost records with invalid procedure occurrence id",,,,, -1601,"Number of procedure cost records with invalid payer plan period id",,,,, -1602,"Distribution of paid copay, by procedure_concept_id","procedure_concept_id",,,, -1603,"Distribution of paid coinsurance, by procedure_concept_id","procedure_concept_id",,,, -1604,"Distribution of paid toward deductible, by procedure_concept_id","procedure_concept_id",,,, -1605,"Distribution of paid by payer, by procedure_concept_id","procedure_concept_id",,,, -1606,"Distribution of paid by coordination of benefit, by procedure_concept_id","procedure_concept_id",,,, -1607,"Distribution of total out-of-pocket, by procedure_concept_id","procedure_concept_id",,,, -1608,"Distribution of total paid, by procedure_concept_id","procedure_concept_id",,,, -1610,"Number of records by revenue_code_concept_id","revenue_code_concept_id",,,, -1700,"Number of records by cohort_concept_id","cohort_concept_id",,,, -1701,"Number of records with cohort end date < cohort start date",,,,, -1800,"Number of persons with at least one measurement occurrence, by measurement_concept_id","measurement_concept_id",,,, -1801,"Number of measurement occurrence records, by measurement_concept_id","measurement_concept_id",,,, -1802,"Number of persons by measurement occurrence start month, by measurement_concept_id","measurement_concept_id","calendar month",,, -1803,"Number of distinct mesurement occurrence concepts per person",,,,, -1804,"Number of persons with at least one mesurement occurrence, by measurement_concept_id by calendar year by gender by age decile","measurement_concept_id","calendar year","gender_concept_id","age decile", -1805,"Number of measurement occurrence records, by measurement_concept_id by measurement_type_concept_id","measurement_concept_id","measurement_type_concept_id",,, -1806,"Distribution of age by measurement_concept_id","measurement_concept_id","gender_concept_id",,, -1807,"Number of measurement occurrence records, by measurement_concept_id and unit_concept_id","measurement_concept_id","unit_concept_id",,, -1809,"Number of measurement records with invalid person_id",,,,, -1810,"Number of measurement records outside valid observation period",,,,, -1812,"Number of measurement records with invalid provider_id",,,,, -1813,"Number of measurement records with invalid visit_id",,,,, -1814,"Number of measurement records with no value (numeric, string, or concept)",,,,, -1815,"Distribution of numeric values, by measurement_concept_id and unit_concept_id",,,,, -1816,"Distribution of low range, by measurement_concept_id and unit_concept_id",,,,, -1817,"Distribution of high range, by observation_concept_id and unit_concept_id",,,,, -1818,"Number of measurement records below/within/above normal range, by measurement_concept_id and unit_concept_id",,,,, -1820,"Number of measurement records by measurement start month","calendar month",,,, -1821,"Number of measurement records with no numeric value",,,,, -1891,"Percentage of total persons that have at least x measurements","measurement_concept_id","measurement_person",,, -1900,"Source values mapped to concept_id 0 by table, by source_value","table_name","source_value",,, -2000,"Number of patients with at least 1 Dx and 1 Rx",,,,, -2001,"Number of patients with at least 1 Dx and 1 Proc",,,,, -2002,"Number of patients with at least 1 Meas, 1 Dx and 1 Rx",,,,, -2003,"Number of patients with at least 1 Visit",,,,, -2100,"Number of persons with at least one device exposure, by device_concept_id","device_concept_id",,,, -2101,"Number of device exposure records, by device_concept_id","device_concept_id",,,, -2102,"Number of persons by device records start month, by device_concept_id","device_concept_id","calendar month",,, -2104,"Number of persons with at least one device exposure, by device_concept_id by calendar year by gender by age decile","device_concept_id","calendar year","gender_concept_id","age decile", -2105,"Number of device exposure records, by device_concept_id by device_type_concept_id","device_concept_id","device_type_concept_id",,, -2200,"Number of persons with at least one note by note_type_concept_id","note_type_concept_id",,,, -2201,"Number of note records, by note_type_concept_id","note_type_concept_id",,,, diff --git a/inst/csv/derived_analysis_details.csv b/inst/csv/derived_analysis_details.csv deleted file mode 100644 index 1581d4c7..00000000 --- a/inst/csv/derived_analysis_details.csv +++ /dev/null @@ -1,25 +0,0 @@ -measure_id,name,statistic_value_name,stratum_1_name,stratum_2_name,description,associated_rules -UnmappedDataByDomain:SourceValueCnt,Count of source values in unmapped data,count of source values,domain,,The measure analyzes how many source codes are unmapped.,34 -AgeAtFirstObsByDecile:DecileCnt,Count of deciles appearing in the data (at first observation),count of deciles,,,"The measure analyzes deciles of patients at their first observation. If only certain age groups are being observed, the count of deciles will be low.",33 -Provider:PatientProviderRatio,Patient Provider Ratio,ratio,,,"The measure looks at how many patients and how many providers are defined in the data. For example, the ratio may indicate abnormaly low number of providers.",31 -Meas:NoNumValue:Percentage,Percentage of rows in MEASUREMENT table that have NULL recorded as numerical value,percentage,,,The measure looks at data recorded in MESUREMENT table. A significant percentage of such rows typically contain a numerical result.,28 -UnmappedData:byDomain:Percentage,Percentage of rows that are unmapped,percentage,domain,,The measure looks at relative size of unmapped data.,27 -Provider:SpeciatlyCnt,Count of specialties found in the provider table,count of specialties,,,"The measure looks at how many different specialties are present. For general datasets, we expect at least some minimum number of specialties.",38 -DrugExposure:ConceptCnt,Count of distinct drug_concept_ids (drug_exposure),count of concepts,,,"Count of distinct drugs. For most datasets, a low number may indicate a data quality problem.", -DrugEra:ConceptCnt,Count of distinct drug_concept_ids (drug_era),count of concepts,,,Count of distinct drugs., -ach_2000:Percentage,Percentage of patients with at least 1 Dx and 1 Rx,percentage,,,Indicates patient with some minimum events in their record, -ach_2001:Percentage,Percentage of patients with at least 1 Dx and 1 Proc,percentage,,,Indicates patient with some minimum events in their record, -ach_2002:Percentage,"Percentage of patients with at least 1 Meas, 1 Dx and 1 Rx",percentage,,,Indicates patient with some minimum events in their record, -ach_2003:Percentage,Percentage of patients with at least 1 visit,percentage,,,Indicates patient with some minimum events in their record,32 -Achilles:byAnalysis:RowCnt,,count of rows,,,"Metadata about which measures were included when Achilles was last executed. Also allows count of types for certain domains (e.g., visit type). This is least sensitive data about a dataset. Pure metadata.", -Visit:Type:PersonWithAtLeastOne:byDecile:Percentage,Percentage of patients that have at least one visit by visity type,percentage,visit_concept_id,decile,The measure indicates which visit types are present in the dataset by decile using non-sensitive percentage view of count of persons., -Device:ConceptCnt,Count of distinct concepts (Device),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Measurement:ConceptCnt,Count of distinct concepts (Measurement),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Observation:ConceptCnt,Count of distinct concepts (Observation),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Procedure:ConceptCnt,Count of distinct concepts (Procedure),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Note:ConceptCnt,Count of distinct concepts (Note),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Death:DeathCause:ConceptCnt,Count of distinct concepts (Note),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Death:DeathType:ConceptCnt,Count of distinct concepts (Note),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", -Death:byYear:SafePatientCnt,Count of patients by year of death,count,calendar year,,Safe patient count indicates that low counts will not be included in the measure, -Death:byDecade:SafePatientCnt,Count of patients by decade,count,calendar decade,,"Count of deaths by calendar decade (e.g., 1990s, 2000s,2100s). Large aggretion by decade is a less sensitive measure to report. ", -Death:BornDeceasedRatio,Ratio of born persons to deceased persons by year,ratio,calendar year,,,39 diff --git a/inst/csv/export/all_reports.csv b/inst/csv/export/all_reports.csv new file mode 100644 index 00000000..cc06bed5 --- /dev/null +++ b/inst/csv/export/all_reports.csv @@ -0,0 +1,16 @@ +REPORT +CONDITION +CONDITION_ERA +DASHBOARD +DATA_DENSITY +DEATH +DRUG +DRUG_ERA +HEEL +OBSERVATION +OBSERVATION_PERIOD +PERSON +PROCEDURE +VISIT +MEASUREMENT +META diff --git a/inst/csv/heel/heel_results_derived_details.csv b/inst/csv/heel/heel_results_derived_details.csv new file mode 100644 index 00000000..ebd4fd71 --- /dev/null +++ b/inst/csv/heel/heel_results_derived_details.csv @@ -0,0 +1,32 @@ +query_id,measure_id,name,statistic_value_name,stratum_1_name,stratum_2_name,description,associated_rules +1,UnmappedDataByDomain:SourceValueCnt,Count of source values in unmapped data,count of source values,domain,,The measure analyzes how many source codes are unmapped.,34 +2,AgeAtFirstObsByDecile:DecileCnt,Count of deciles appearing in the data (at first observation),count of deciles,,,"The measure analyzes deciles of patients at their first observation. If only certain age groups are being observed, the count of deciles will be low.",33 +3,Provider:PatientProviderRatio,Patient Provider Ratio,ratio,,,"The measure looks at how many patients and how many providers are defined in the data. For example, the ratio may indicate abnormaly low number of providers.",31 +4,Meas:NoNumValue:Percentage,Percentage of rows in MEASUREMENT table that have NULL recorded as numerical value,percentage,,,The measure looks at data recorded in MESUREMENT table. A significant percentage of such rows typically contain a numerical result.,28 +28,UnmappedData:byDomain:Percentage,Percentage of rows that are unmapped,percentage,domain,,The measure looks at relative size of unmapped data.,27 +6,Provider:SpeciatlyCnt,Count of specialties found in the provider table,count of specialties,,,"The measure looks at how many different specialties are present. For general datasets, we expect at least some minimum number of specialties.",38 +7,DrugExposure:ConceptCnt,Count of distinct drug_concept_ids (drug_exposure),count of concepts,,,"Count of distinct drugs. For most datasets, a low number may indicate a data quality problem.", +8,DrugEra:ConceptCnt,Count of distinct drug_concept_ids (drug_era),count of concepts,,,Count of distinct drugs., +27,ach_2000:Percentage,Percentage of patients with at least 1 Dx and 1 Rx,percentage,,,Indicates patient with some minimum events in their record, +27,ach_2001:Percentage,Percentage of patients with at least 1 Dx and 1 Proc,percentage,,,Indicates patient with some minimum events in their record, +27,ach_2002:Percentage,"Percentage of patients with at least 1 Meas, 1 Dx and 1 Rx",percentage,,,Indicates patient with some minimum events in their record, +27,ach_2003:Percentage,Percentage of patients with at least 1 visit,percentage,,,Indicates patient with some minimum events in their record,32 +13,Achilles:byAnalysis:RowCnt,,count of rows,,,"Metadata about which measures were included when Achilles was last executed. Also allows count of types for certain domains (e.g., visit type). This is least sensitive data about a dataset. Pure metadata.", +14,Visit:Type:PersonWithAtLeastOne:byDecile:Percentage,Percentage of patients that have at least one visit by visity type,percentage,visit_concept_id,decile,The measure indicates which visit types are present in the dataset by decile using non-sensitive percentage view of count of persons., +15,Device:ConceptCnt,Count of distinct concepts (Device),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +16,Measurement:ConceptCnt,Count of distinct concepts (Measurement),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +17,Observation:ConceptCnt,Count of distinct concepts (Observation),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +18,Procedure:ConceptCnt,Count of distinct concepts (Procedure),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +19,Note:ConceptCnt,Count of distinct concepts (Note),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +20,Death:DeathCause:ConceptCnt,Count of distinct concepts (Death Cause),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +21,Death:DeathType:ConceptCnt,Count of distinct concepts (Death Type),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +22,Death:byYear:SafePatientCnt,Count of patients by year of death,count,calendar year,,Safe patient count indicates that low counts will not be included in the measure, +23,Death:byDecade:SafePatientCnt,Count of patients by decade,count,calendar decade,,"Count of deaths by calendar decade (e.g., 1990s, 2000s,2100s). Large aggretion by decade is a less sensitive measure to report. ", +24,Death:BornDeceasedRatio,Ratio of born persons to deceased persons by year,ratio,calendar year,,,39 +25,GlobalCnt,,,,,, +26,GlobalRowCnt,,,,,, +29,Visit_InstanceCnt,,,,,, +30,Condition:ConceptCnt,Count of distinct concepts (Condition),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +31,Visit:ConceptCnt,Count of distinct concepts (Visit),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +32,Person:Ethnicity:ConceptCnt,Count of distinct concepts (ethnicity),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", +33,Person:Race:ConceptCnt,Count of distinct concepts (ethnicity),count of concepts,,,"Count of distinct concepts. For most datasets, a low number may indicate a data quality problem.", diff --git a/inst/csv/heel/heel_rules_all.csv b/inst/csv/heel/heel_rules_all.csv new file mode 100644 index 00000000..b0d48c62 --- /dev/null +++ b/inst/csv/heel/heel_rules_all.csv @@ -0,0 +1,43 @@ +rule_id,rule_name,execution_type,destination_table,severity,rule_type,rule_description,threshold,rule_classification,rule_scope,linked_measure +1,multiple checks for greater than zero,parallel,heel_results,error,DQ,umbrella rule: this rule includes multiple error checks on over 35 analysis_ids,>0,complex,, +2,multiple checks where minimum value of a measure should not be negative,parallel,heel_results,error,DQ,umbrella rule: this rule includes multiple error checks on over 20 analysis_ids where min value in distribution should not be negative,,complex,, +3,multiple checks related to death data where maximum value of a measure should not be positive,parallel,heel_results,warning,DQ,death distributions where max should not be positive (using anlyses 511;512;513;514;515),,plausibility,, +4,invalid concept_id,parallel,heel_results,error,CDM conformance,invalid concept_id,,,, +5,invalid type concept_id,parallel,heel_results,error,CDM conformance,invalid type concept_id,,,, +6,data with unmapped concepts,parallel,heel_results,warning,DQ,for multiple analyses,,,, +7,concept from the wrong vocabulary,parallel,heel_results,error,CDM conformance,concept from the wrong vocabulary,,,,2 +8,concept from the wrong vocabulary; race,parallel,heel_results,error,CDM conformance,concept from the wrong vocabulary; race,,,,4 +9,concept from the wrong vocabulary; ethnicity,parallel,heel_results,error,CDM conformance,concept from the wrong vocabulary; ethnicity,,,,5 +10,concept from the wrong vocabulary; place of service,parallel,heel_results,error,CDM conformance,concept from the wrong vocabulary; place of service,,,,202 +11,incorrect terminology,parallel,heel_results,error,CDM conformance,specialty - 48 specialty,,,,301 +12,Dx is not a SNOMED code,parallel,heel_results,error,CDM conformance,"concept from the wrong vocabulary; Condition Occurrence, Condition Era (SNOMED)",,,,400;1000 +13,Drug is not RxNorm concept,parallel,heel_results,error,CDM conformance,"concept from the wrong vocabulary; Drug Exposure, Drug Era (RxNorm)",,,,700;900 +14,"Procedure is not CPT, ICD9Proc or HCPCS",parallel,heel_results,error,CDM conformance,procedure - 4 CPT4/5 HCPCS/3 ICD9P,,,,600 +17,incorrect terminology,parallel,heel_results,error,CDM conformance,revenue code - 43 revenue code,,,,1610 +18,year of birth is in the future,parallel,heel_results,error,DQ,year of birth should not be in the future ,,plausibility,,3 +19,year of birth is prior 1800,parallel,heel_results,warning,DQ, year of birth < 1800,<1800,plausibility,,3 +20,age below 0,parallel,heel_results,error,DQ,age < 0,<0,plausibility,,101 +21,age too high,parallel,heel_results,error,DQ,age > 150,>150,plausibility,,101 +22,monthly trend,parallel,heel_results,warning,DQ,monthly change > 100%,,fidelity,, +23,monthly trend,parallel,heel_results,warning,DQ,monthly change > 100% at concept level,,fidelity,, +24,too high days_supply,parallel,heel_results,warning,DQ,days_supply > 180,,plausibility,, +25,too high number of refils,parallel,heel_results,warning,DQ,refills > 10,>10,plausibility,,716 +26,implausible quantity for drug,parallel,heel_results,warning,DQ,quantity > 600,>600,plausibility,,717 +27,more than 1 percent of unmapped rows (concept_0 rows),serial,both,warning,DQ,for multiple analyses (4xx;6xx;7xx;8xx;18xx),>1,completeness,,UnmappedData:byDomain:Percentage +28,percentage of non-numerical measurement records exceeds general population threshold,serial,both,warning,DQ,"typically, measurement data contans a significant proportion of rows with numerical result. This rule looks at rows in MEASUREMENT and alerts the user if a large proportion of rows lack any numerical result",>=80,completeness,GeneralPopulationOnly,Meas:NoNumValue:Percentage +29,infant diagnosis at senior age of over 50yo,serial,heel_results,error,DQ,mecconium condition 195075; This rule is example of a terminology depended data quality tool,,plausibility,,404 +31,ratio of providers to total patients,serial,both,notification,DQ,This rules fires if data indicate a high number of patients and only a few providers exist. ,,plausibility,,Provider:PatientProviderRatio +32,Percentage of patients with no visits exceeds threshold,serial,heel_results,notification,DQ,checks if the percentage of patients with no visits exceeds threshold,>5,plausibility,,ach_2003:Percentage +33,[GeneralPopulationOnly] Not all deciles represented at first observation,serial,heel_results,notification,DQ,"in a general population, a database would observe first visit across all age groups. We at least expect deciles 0 to 8. Rule looks at the count of deciles.",<9,completeness,GeneralPopulationOnly,AgeAtFirstObsByDecile:DecileCnt +34,Count of unmapped source values in a domain exceeds threshold,serial,heel_results,notification,DQ,"looks at values that are mapped to concept0 and their source values by table, rule 6 is related to this rule but it does not look at the size of the problem (only if unmapped data are present or not present)",,completeness,,UnmappedDataByDomain:SourceValueCnt +35,Count of measurement_ids with more than 5 distinct units exceeds threshold,serial,heel_results,notification,DQ,"Idealy, each measurement would use only one unit. For example, kg for weight. This rule notifies the user if database has measurements that have 5 or more units. This rule has technically thresholds. ",>=5;>=10,fidelity,,1807 +36,age too high2,serial,heel_results,warning,DQ,age > 125; same as rule 21 but as warning flavor; same threshold is used by Sentinel DQA,>125,plausibility:temporal,,101 +37,Notes data density,serial,both,notification,DQ,"This rule is measuring data density on visit level (in addition to patient and dataset level); Assumption is that at least one data event (e.g., diagnosis, note) is generated for each visit; This rule is testing that at least some notes exist (considering the number of visits); for datasets with zero notes the derived measure is null and rule does not fire at all; current rule is on overall data density (for notes only) per visit level",<0.01,completeness,,Note:NoteVisitRatio +38,Provider Specialty data density,serial,heel_results,notification,DQ,"in a general dataset, it is expected that more than providers with a wide range of specialties (at least more than just one specialty) is present; notification may indicate that provider table is missing data on specialty,typical datat has at least 28 specialties present in provider table",<2,completeness,GeneralPopulationOnly,Provider:SpecialtyCnt +39,Too high Born to Deceased Ratio in some years,serial,heel_results,notification,DQ,"Given lifetime record DQ assumption if more than 30k patients is born for every deceased patient the dataset may not be recording complete records for all senior patients in that year +",>30000,completeness,GeneralPopulationOnly,Death:BornDeceasedRatio +40,Death event outside observation period,serial,heel_results,error,DQ,death event should not be outside observation period; this rule was if previous versions subsumed in umbrella rule,,completeness,,510 +41,No weight data in MEASUREMENT table,serial,heel_results,notification,DQ,implementation of similar Sentinel rule for certain vital signs; rule lukes at concept_id 3025315 (LOINC code 29463-7)),,completeness,,1800 +42,Percentage of outpatient visits is too low,serial,heel_results,notification,DQ,"Rule is looking at percentage of outpatient visits. If this measure is too low (e.g. 5 percent), it may indicate a predominantly inpatient dataset. Threshold was decided on DQ-Study 2. General population only rule.",<0.42,completeness,GeneralPopulationOnly,201 +43,99+ percent of persons have exactly one observation period,serial,heel_results,notification,DQ,Some datasets cannot provide observation period data based on health insurance start and stop dates. Rule notifies a user if 99+% of patients have exactly one observation period. ,>=99.0,completeness,,113 +44,"Percentage of patients with at least 1 Measurement, 1 Dx and 1 Rx is below threshold",serial,heel_results,notification,DQ,This notification may indicate that a significant percentage of patients is missing data for either Measurement or Diagnosis or Medication. Many clinical studies may want to require at least some data in all three domains. Threshold was decided empirically in OHDSI DQ Study ,,completeness,,2002 diff --git a/inst/csv/rule_drill_down.csv b/inst/csv/heel/heel_rules_drilldown.csv similarity index 100% rename from inst/csv/rule_drill_down.csv rename to inst/csv/heel/heel_rules_drilldown.csv diff --git a/inst/csv/schemas/schema_achilles_results.csv b/inst/csv/schemas/schema_achilles_results.csv new file mode 100644 index 00000000..0c37e150 --- /dev/null +++ b/inst/csv/schemas/schema_achilles_results.csv @@ -0,0 +1,8 @@ +FIELD_NAME,FIELD_TYPE +analysis_id,int +stratum_1,varchar(255) +stratum_2,varchar(255) +stratum_3,varchar(255) +stratum_4,varchar(255) +stratum_5,varchar(255) +count_value,bigint \ No newline at end of file diff --git a/inst/csv/schemas/schema_achilles_results_dist.csv b/inst/csv/schemas/schema_achilles_results_dist.csv new file mode 100644 index 00000000..b9fb3f7a --- /dev/null +++ b/inst/csv/schemas/schema_achilles_results_dist.csv @@ -0,0 +1,17 @@ +FIELD_NAME,FIELD_TYPE +analysis_id,int +stratum_1,varchar(255) +stratum_2,varchar(255) +stratum_3,varchar(255) +stratum_4,varchar(255) +stratum_5,varchar(255) +count_value,bigint +min_value,float +max_value,float +avg_value,float +stdev_value,float +median_value,float +p10_value,float +p25_value,float +p75_value,float +p90_value,float \ No newline at end of file diff --git a/inst/sql/sql_server/AchillesHeel_v4.sql b/inst/sql/sql_server/AchillesHeel_v4.sql deleted file mode 100644 index 07accdfc..00000000 --- a/inst/sql/sql_server/AchillesHeel_v4.sql +++ /dev/null @@ -1,770 +0,0 @@ -/****************************************************************** - -# @file ACHILLESHEEL.SQL -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of ACHILLES -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics -# -# 2015-12-18 CHANGE: this file will first contain v4 specific checks and then will include copy -# of the v5 file rules that also apply to be similar to - - - -*******************************************************************/ - - -/******************************************************************* - -Achilles Heel - data quality assessment based on database profiling summary statistics - -SQL for ACHILLES results (for either OMOP CDM v4) - - -*******************************************************************/ - -{DEFAULT @cdm_database_schema = 'CDM.dbo'} -{DEFAULT @results_database = 'scratch'} -{DEFAULT @source_name = 'CDM NAME'} -{DEFAULT @smallcellcount = 5} -{DEFAULT @createTable = TRUE} - - ---Achilles_Heel part: -IF OBJECT_ID('@results_database_schema.ACHILLES_HEEL_results', 'U') IS NOT NULL - DROP TABLE @results_database_schema.ACHILLES_HEEL_results; - -CREATE TABLE @results_database_schema.ACHILLES_HEEL_results ( - analysis_id INT, - ACHILLES_HEEL_warning VARCHAR(255), - rule_id INT, - record_count BIGINT -); - - - ---V4 specific rules start here - ---concept from the wrong vocabulary ---ruleid 7 gender - 12 HL7 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary' AS ACHILLES_HEEL_warning, - 7 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (2) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - and c1.vocabulary_id NOT IN (0,12) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 8 race - 13 CDC Race -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary' AS ACHILLES_HEEL_warning, - 8 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (4) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,13) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 9 ethnicity - 44 ethnicity -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary (CMS Ethnicity)' AS ACHILLES_HEEL_warning, - 9 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (5) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,44) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 10 place of service - 14 CMS place of service, 24 OMOP visit -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary' AS ACHILLES_HEEL_warning, - 10 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (202) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - and c1.vocabulary_id NOT IN (0,14,24) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 11 specialty - 48 specialty -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary (Specialty)' AS ACHILLES_HEEL_warning, - 11 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (301) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,48) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 12 condition occurrence, era - 1 SNOMED -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary' AS ACHILLES_HEEL_warning, - 12 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN ( - 400, - 1000 - ) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,1) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 13 drug exposure - 8 RxNorm/82 RxNorm Extension -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary' AS ACHILLES_HEEL_warning, - 13 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN ( - 700, - 900 - ) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,8,82) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 14 procedure - 4 CPT4/5 HCPCS/3 ICD9P -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary' AS ACHILLES_HEEL_warning, - 14 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (600) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,3,4,5) -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 15 observation - 6 LOINC -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary (LOINC)' AS ACHILLES_HEEL_warning, - 15 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (800) - AND or1.stratum_1 IS NOT NULL - AND c1.vocabulary_id NOT IN (0,6) -GROUP BY or1.analysis_id, - oa1.analysis_name; - - ---ruleid 16 disease class - 40 DRG -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary (DRG)' AS ACHILLES_HEEL_warning, - 16 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (1609) - AND or1.stratum_1 IS NOT NULL - AND c1.vocabulary_id NOT IN ( - 0, - 40 - ) -GROUP BY or1.analysis_id, - oa1.analysis_name; - - ---copy of rules from v5 file that also aply to v4 - - ---ruleid 1 check for non-zero counts from checks of improper data (invalid ids, out-of-bound data, inconsistent dates) -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; count (n=' + cast(or1.count_value as VARCHAR) + ') should not be > 0' AS ACHILLES_HEEL_warning, - 1 as rule_id, - or1.count_value -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN ( - 7, - 8, - 9, - 114, - 115, - 118, - 207, - 208, - 209, - 210, - 302, - 409, - 410, - 411, - 412, - 413, - 509, - 510, - 609, - 610, - 612, - 613, - 709, - 710, - 711, - 712, - 713, - 809, - 810, - 812, - 813, - 814, - 908, - 909, - 910, - 1008, - 1009, - 1010, - 1415, - 1500, - 1501, - 1600, - 1601, - 1701 - ) --all explicit counts of data anamolies - AND or1.count_value > 0; - ---ruleid 2 distributions where min should not be negative -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT ord1.analysis_id, - 'ERROR: ' + cast(ord1.analysis_id as VARCHAR) + ' - ' + oa1.analysis_name + ' (count = ' + cast(COUNT_BIG(ord1.min_value) as VARCHAR) + '); min value should not be negative' AS ACHILLES_HEEL_warning, - 2 as rule_id, - COUNT_BIG(ord1.min_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN ( - 103, - 105, - 206, - 406, - 506, - 606, - 706, - 715, - 716, - 717, - 806, - 906, - 907, - 1006, - 1007, - 1502, - 1503, - 1504, - 1505, - 1506, - 1507, - 1508, - 1509, - 1510, - 1511, - 1602, - 1603, - 1604, - 1605, - 1606, - 1607, - 1608 - ) - AND ord1.min_value < 0 - GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 3 death distributions where max should not be positive -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count -) -SELECT DISTINCT ord1.analysis_id, - 'WARNING: ' + cast(ord1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + ' (count = ' + cast(COUNT_BIG(ord1.max_value) as VARCHAR) + '); max value should not be positive, otherwise its a zombie with data >1mo after death ' AS ACHILLES_HEEL_warning, - 3 as rule_id, - COUNT_BIG(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN ( - 511, - 512, - 513, - 514, - 515 - ) - AND ord1.max_value > 30 -GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 4 invalid concept_id -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count -) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in vocabulary' AS ACHILLES_HEEL_warning, - 4 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -LEFT JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN ( - 2, - 4, - 5, - 200, - 301, - 400, - 500, - 505, - 600, - 700, - 800, - 900, - 1000, - 1609, - 1610 - ) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id IS NULL -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 5 invalid type concept_id -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_2) AS VARCHAR) + ' concepts in data are not in vocabulary' AS ACHILLES_HEEL_warning, - 5 as rule_id, - COUNT_BIG(DISTINCT stratum_2) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -LEFT JOIN @vocab_database_schema.concept c1 - ON or1.stratum_2 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN ( - 405, - 605, - 705, - 805 - ) - AND or1.stratum_2 IS NOT NULL - AND c1.concept_id IS NULL -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 6 invalid concept_id -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'WARNING: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; data with unmapped concepts' AS ACHILLES_HEEL_warning, - 6 as rule_id, - null as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN ( - 2, - 4, - 5, - 200, - 301, - 400, - 500, - 505, - 600, - 700, - 800, - 900, - 1000, - 1609, - 1610 - ) - AND or1.stratum_1 = '0' -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---NOT APPLICABLE IN CDMv5 - ---16 disease class - 40 DRG - ---NOT APPLICABLE IN CDMV5 - ---ruleid 17 revenue code - 43 revenue code -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR) + ' concepts in data are not in correct vocabulary (revenue code)' AS ACHILLES_HEEL_warning, - 17 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR) -WHERE or1.analysis_id IN (1610) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND c1.vocabulary_id NOT IN (0,43) -GROUP BY or1.analysis_id, - oa1.analysis_name; - - ---ruleid 18 ERROR: year of birth in the future -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; should not have year of birth in the future, (n=' + cast(sum(or1.count_value) as VARCHAR) + ')' AS ACHILLES_HEEL_warning, - 18 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (3) - AND CAST(or1.stratum_1 AS INT) > year(getdate()) - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - - ---ruleid 19 WARNING: year of birth < 1800 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; should not have year of birth < 1800, (n=' + cast(sum(or1.count_value) as VARCHAR) + ')' AS ACHILLES_HEEL_warning, - 19 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (3) - AND cAST(or1.stratum_1 AS INT) < 1800 - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 20 ERROR: age < 0 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; should not have age < 0, (n=' + cast(sum(or1.count_value) as VARCHAR) + ')' AS ACHILLES_HEEL_warning, - 20 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (101) - AND CAST(or1.stratum_1 AS INT) < 0 - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 21 ERROR: age > 150 (TODO lower number seems more appropriate) -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - 'ERROR: ' + cast(or1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + '; should not have age > 150, (n=' + cast(sum(or1.count_value) as VARCHAR) + ')' AS ACHILLES_HEEL_warning, - 21 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (101) - AND CAST(or1.stratum_1 AS INT) > 150 - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 22 WARNING: monthly change > 100% -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id - - ) -SELECT DISTINCT ar1.analysis_id, - 'WARNING: ' + cast(ar1.analysis_id as VARCHAR) + '-' + aa1.analysis_name + '; theres a 100% change in monthly count of events' AS ACHILLES_HEEL_warning, - 22 as rule_id -FROM @results_database_schema.ACHILLES_analysis aa1 -INNER JOIN @results_database_schema.ACHILLES_results ar1 - ON aa1.analysis_id = ar1.analysis_id -INNER JOIN @results_database_schema.ACHILLES_results ar2 - ON ar1.analysis_id = ar2.analysis_id - AND ar1.analysis_id IN ( - 420, - 620, - 720, - 820, - 920, - 1020 - ) -WHERE ( - CAST(ar1.stratum_1 AS INT) + 1 = CAST(ar2.stratum_1 AS INT) - OR CAST(ar1.stratum_1 AS INT) + 89 = CAST(ar2.stratum_1 AS INT) - ) - AND 1.0 * abs(ar2.count_value - ar1.count_value) / ar1.count_value > 1 - AND ar1.count_value > 10; - ---ruleid 23 WARNING: monthly change > 100% at concept level -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT ar1.analysis_id, - 'WARNING: ' + cast(ar1.analysis_id as VARCHAR) + '-' + aa1.analysis_name + '; ' + cast(COUNT_BIG(DISTINCT ar1.stratum_1) AS VARCHAR) + ' concepts have a 100% change in monthly count of events' AS ACHILLES_HEEL_warning, - 23 as rule_id, - COUNT_BIG(DISTINCT ar1.stratum_1) as record_count -FROM @results_database_schema.ACHILLES_analysis aa1 -INNER JOIN @results_database_schema.ACHILLES_results ar1 - ON aa1.analysis_id = ar1.analysis_id -INNER JOIN @results_database_schema.ACHILLES_results ar2 - ON ar1.analysis_id = ar2.analysis_id - AND ar1.stratum_1 = ar2.stratum_1 - AND ar1.analysis_id IN ( - 402, - 602, - 702, - 802, - 902, - 1002 - ) -WHERE ( - CAST(ar1.stratum_2 AS INT) + 1 = CAST(ar2.stratum_2 AS INT) - OR CAST(ar1.stratum_2 AS INT) + 89 = CAST(ar2.stratum_2 AS INT) - ) - AND 1.0 * abs(ar2.count_value - ar1.count_value) / ar1.count_value > 1 - AND ar1.count_value > 10 -GROUP BY ar1.analysis_id, - aa1.analysis_name; - ---ruleid 24 WARNING: days_supply > 180 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT ord1.analysis_id, - 'WARNING: ' + cast(ord1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + ' (count = ' + cast(COUNT_BIG(ord1.max_value) as VARCHAR) + '); max value should not be > 180' AS ACHILLES_HEEL_warning, - 24 as rule_id, - COUNT_BIG(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN (715) - AND ord1.max_value > 180 -GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 25 WARNING: refills > 10 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT ord1.analysis_id, - 'WARNING: ' + cast(ord1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + ' (count = ' + cast(COUNT_BIG(ord1.max_value) as VARCHAR) + '); max value should not be > 10' AS ACHILLES_HEEL_warning, - 25 as rule_id, - COUNT_BIG(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN (716) - AND ord1.max_value > 10 -GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 26 WARNING: quantity > 600 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT ord1.analysis_id, - 'WARNING: ' + cast(ord1.analysis_id as VARCHAR) + '-' + oa1.analysis_name + ' (count = ' + cast(count(ord1.max_value) as VARCHAR) + '); max value should not be > 600' AS ACHILLES_HEEL_warning, - 26 as rule_id, - count(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN (717) - AND ord1.max_value > 600 -GROUP BY ord1.analysis_id, oa1.analysis_name; - diff --git a/inst/sql/sql_server/AchillesHeel_v5.sql b/inst/sql/sql_server/AchillesHeel_v5.sql deleted file mode 100644 index 48ec2e0d..00000000 --- a/inst/sql/sql_server/AchillesHeel_v5.sql +++ /dev/null @@ -1,1415 +0,0 @@ -/****************************************************************** - -# @file ACHILLESHEEL.SQL -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of ACHILLES -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics - - - - -*******************************************************************/ - - -/******************************************************************* - -Achilles Heel - data quality assessment based on database profiling summary statistics - -SQL for ACHILLES results (for either OMOP CDM v4 or OMOP CDM v5) - - -*******************************************************************/ - -{DEFAULT @cdm_database_schema = 'CDM.dbo'} -{DEFAULT @source_name = 'CDM NAME'} -{DEFAULT @smallcellcount = 5} -{DEFAULT @createTable = TRUE} -{DEFAULT @derivedDataSmPtCount = 11} -{DEFAULT @ThresholdAgeWarning = 125} -{DEFAULT @ThresholdOutpatientVisitPerc = 0.43} -{DEFAULT @ThresholdMinimalPtMeasDxRx = 20.5} - - ---@results_database_schema.ACHILLES_Heel part: - ---prepare the tables first - -IF OBJECT_ID('@results_database_schema.ACHILLES_HEEL_results', 'U') IS NOT NULL - DROP TABLE @results_database_schema.ACHILLES_HEEL_results; - -CREATE TABLE @results_database_schema.ACHILLES_HEEL_results ( - analysis_id INT, - ACHILLES_HEEL_warning VARCHAR(255), - rule_id INT, - record_count BIGINT -); - - ---new part of Heel requires derived tables (per suggestion of Patrick) ---table structure is up for discussion ---per DQI group suggestion: measure_id is made into a string to make derivation ---and sql authoring easy ---computation is quick so the whole table gets wiped every time Heel is executed ---in derived table: analysis_id is not used, look at measure_id instead - - -IF OBJECT_ID('@results_database_schema.ACHILLES_results_derived', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_results_derived; - -create table @results_database_schema.ACHILLES_results_derived -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - statistic_value float, - measure_id varchar(255) -); - - - - - ---general derived measures ---non-CDM sources may generate derived measures directly ---for CDM and Achilles: the fastest way to compute derived measures is to use ---existing measures ---derived measures have IDs over 100 000 (not any more, instead, they use measure_id as their id) - - ---event type derived measures analysis xx05 is often analysis by xx_type ---generate counts for meas type, drug type, proc type, obs type ---optional TODO: possibly rewrite this with CASE statement to better make 705 into drug, 605 into proc ...etc --- in measure_id column (or make that separate sql calls for each category) -with t1(label_id, measure_id) as (select analysis_id as label_id, CAST(CONCAT('ach_',CAST(analysis_id as VARCHAR(10)),':GlobalCnt') AS VARCHAR(100)) as measure_id - from @results_database_schema.achilles_results - where analysis_id in(1805,705,605,805,405)) -insert into @results_database_schema.ACHILLES_results_derived (analysis_id, stratum_1, statistic_value,measure_id) -select - --100000+analysis_id, - NULL as analysis_id, - stratum_2 as stratum_1, - sum(count_value) as statistic_value, - CAST(CONCAT('ach_',CAST(analysis_id as VARCHAR(10)),':GlobalCnt') AS VARCHAR(100)) as measure_id -from t1 inner join @results_database_schema.achilles_results on t1.label_id = analysis_id -group by analysis_id,stratum_2,measure_id; - - - - ---total number of rows per domain ---this derived measure is used for later measure of % of unmapped rows ---this produces a total count of rows in condition table, procedure table etc. ---used as denominator in later measures - insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) - select sum(count_value) as statistic_value, - CAST(CONCAT('ach_',CAST(analysis_id as VARCHAR(10)),':GlobalRowCnt') AS VARCHAR(100)) as measure_id - from @results_database_schema.achilles_results - where analysis_id in (401,601,701,801,1801) group by analysis_id - ; - ---concept_0 global row Counts per domain ---this is numerator for percentage value of unmapped rows (per domain) -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) - select count_value as statistic_value, - CAST(CONCAT('UnmappedData:ach_',CAST(analysis_id as VARCHAR(10)),':GlobalRowCnt') AS VARCHAR(100)) as measure_id - from @results_database_schema.achilles_results - --TODO:stratum_1 is varchar and this comparison may fail on some db engines - --indeed, mysql got error, changed to a string comparison - where analysis_id in (401,601,701,801,1801) and stratum_1 = '0' - ; - - - ---iris measures by percentage ---for this part, derived table is trying to adopt DQI terminolgy ---and generalize analysis naming scheme (and generalize the DQ rules) - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select - CAST(100.0*count_value/statistic.total_pts AS FLOAT) as statistic_value, - CAST(CONCAT('ach_',CAST(analysis_id as VARCHAR(10)),':Percentage') AS VARCHAR(100)) as measure_id - from @results_database_schema.achilles_results - cross join (SELECT count_value as total_pts from @results_database_schema.achilles_results r where analysis_id =1) as statistic - where analysis_id in (2000,2001,2002,2003); - - - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) - select sum(count_value) as statistic_value, - CAST('Visit:InstanceCnt' AS VARCHAR(255)) as measure_id - from @results_database_schema.achilles_results where analysis_id = 201; - - --in dist analysis/measure 203 - a number similar to that is computed above but it is on person level - - ---age at first observation by decile -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,statistic_value,measure_id) -select cast(floor(cast(stratum_1 as int)/10) as varchar(10)), - sum(count_value) as statistic_value, - CAST('AgeAtFirstObsByDecile:PersonCnt' AS VARCHAR(255)) as measure_id - from @results_database_schema.achilles_results where analysis_id = 101 -group by floor(cast(stratum_1 as int)/10); - ---count whether all deciles from 0 to 8 are there (has later a rule: if less the threshold, issue notification) -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, - CAST('AgeAtFirstObsByDecile:DecileCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results_derived -where measure_id = 'AgeAtFirstObsByDecile:PersonCnt' -and cast(stratum_1 as int) <=8; - - ---data density measures - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('DrugExposure:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 701; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('DrugEra:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 901; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Condition:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 401; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Procedure:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 601; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Observation:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 801; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Measurement:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 1801; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Visit:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 201; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Death:DeathType:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 505; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Death:DeathCause:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 501; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Person:Race:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 4; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Person:Ethnicity:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 5; - - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Device:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 2101; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select count(*) as statistic_value, CAST('Note:ConceptCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.ACHILLES_results where analysis_id = 2201; - ---unmapped data (concept_0) derived measures (focusing on source values) - -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,statistic_value,measure_id) -select stratum_1, -count(*) as statistic_value, -CAST('UnmappedDataByDomain:SourceValueCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results where analysis_id = 1900 group by stratum_1; - - ---count of specialties in the provider table ---(subsequent rule can check if this count is > trehshold) (general population dataset only)) -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -select -count(*) as statistic_value, -CAST('Provider:SpeciatlyCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results where analysis_id = 301; - - - ---derived data that are safe to share (greater aggregation and small patient count discarded at query level) --- in derived result table; not at the end of the script - - -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,statistic_value,measure_id) -select CAST(decade AS VARCHAR(255)) as stratum_1,temp_cnt as statistic_value, -CAST('Death:byDecade:SafePatientCnt' AS VARCHAR(255)) as measure_id -from - (select left(stratum_1,3) as decade,sum(count_value) as temp_cnt from @results_database_schema.achilles_results where analysis_id = 504 group by left(stratum_1,3) - )a -where temp_cnt >= @derivedDataSmPtCount; - - - -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,statistic_value,measure_id) -select stratum_1,temp_cnt as statistic_value, -CAST('Death:byYear:SafePatientCnt' AS VARCHAR(255)) as measure_id -from - (select stratum_1,sum(count_value) as temp_cnt from @results_database_schema.achilles_results where analysis_id = 504 group by stratum_1 - )a -where temp_cnt >= @derivedDataSmPtCount; - - - ---more aggregated view of visit type by decile (derived from analysis_id 204) ---denominator calculation will be replaced with new measure 212 in next version - -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,stratum_2,statistic_value,measure_id) -select a.stratum_1, - a.stratum_4 as stratum_2, - CAST(1.0*a.person_cnt/b.population_size AS FLOAT) as statistic_value, -CAST('Visit:Type:PersonWithAtLeastOne:byDecile:Percentage' AS VARCHAR(255)) as measure_id -from -(select stratum_1, stratum_4, sum(count_value) as person_cnt from @results_database_schema.achilles_results where analysis_id = 204 group by stratum_1, stratum_4) a -inner join -(select stratum_4, sum(count_value) as population_size from @results_database_schema.achilles_results where analysis_id = 204 group by stratum_4) b -on a.stratum_4=b.stratum_4 -where a.person_cnt >= @derivedDataSmPtCount; - - ---size of Achilles Metadata -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,statistic_value,measure_id) -select CAST(analysis_id AS VARCHAR(255)) as stratum_1,COUNT_BIG(*) as statistic_value, -CAST('Achilles:byAnalysis:RowCnt' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results group by analysis_id -; - - ---General Population Only: ratio of born to deceased (indicates missing birth or death events) stratified by year -insert into @results_database_schema.ACHILLES_results_derived (stratum_1,statistic_value,measure_id) -select a.stratum_1, - CAST(1.0*a.born_cnt/b.died_cnt AS FLOAT) as statistic_value, - CAST('Death:BornDeceasedRatio' AS VARCHAR(255)) as measure_id -from (select stratum_1,count_value as born_cnt from @results_database_schema.achilles_results where analysis_id = 3) a -inner join -(select stratum_1, count(count_value) as died_cnt from @results_database_schema.achilles_results where analysis_id = 504 group by stratum_1) b -on a.stratum_1 = b.stratum_1 -where b.died_cnt > 0 -; - - - ---end of derived general measures ******************************************************************** - - - - - - - ---actual Heel rules start from here ***************************************** - - - - - - - ---Some rules check conformance to the CDM model, other rules look at data quality - - ---ruleid 1 check for non-zero counts from checks of improper data (invalid ids, out-of-bound data, inconsistent dates) -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; count (n=', cast(or1.count_value as VARCHAR(19)), ') should not be > 0') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 1 as rule_id, - or1.count_value -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN ( - 7, - 8, - 9, - 114, - 115, - 118, - 207, - 208, - 209, - 210, - 302, - 409, - 410, - 411, - 412, - 413, - 509, - --510, taken out from this umbrella rule and implemented separately - 609, - 610, - 612, - 613, - 709, - 710, - 711, - 712, - 713, - 809, - 810, - 812, - 813, - 814, - 908, - 909, - 910, - 1008, - 1009, - 1010, - 1415, - 1500, - 1501, - 1600, - 1601, - 1701 - ) --all explicit counts of data anamolies - AND or1.count_value > 0; - ---ruleid 2 distributions where min should not be negative -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT ord1.analysis_id, - CAST(CONCAT('ERROR: ', cast(ord1.analysis_id as VARCHAR(10)), ' - ', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.min_value) as VARCHAR(19)), '); min value should not be negative') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 2 as rule_id, - COUNT_BIG(ord1.min_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN ( - 103, - 105, - 206, - 406, - 506, - 606, - 706, - 715, - 716, - 717, - 806, - 906, - 907, - 1006, - 1007, - 1502, - 1503, - 1504, - 1505, - 1506, - 1507, - 1508, - 1509, - 1510, - 1511, - 1602, - 1603, - 1604, - 1605, - 1606, - 1607, - 1608 - ) - AND ord1.min_value < 0 - GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 3 death distributions where max should not be positive -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count -) -SELECT ord1.analysis_id, - CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.max_value) as VARCHAR(19)), '); max value should not be positive, otherwise its a zombie with data >1mo after death ') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 3 as rule_id, - COUNT_BIG(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN ( - 511, - 512, - 513, - 514, - 515 - ) - AND ord1.max_value > 30 -GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 4 CDM-conformance rule: invalid concept_id -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count -) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 4 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -LEFT JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN ( - 2, - 4, - 5, - 200, - 301, - 400, - 500, - 505, - 600, - 700, - 800, - 900, - 1000, - 1609, - 1610 - ) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id IS NULL -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 5 CDM-conformance rule:invalid type concept_id ---this rule is only checking that the concept is valid (joins to concept table at all) ---it does not check the vocabulary_id to further restrict the scope of the valid concepts ---to only include,for example, death types -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_2) AS VARCHAR(19)), ' concepts in data are not in vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 5 as rule_id, - COUNT_BIG(DISTINCT stratum_2) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -LEFT JOIN @vocab_database_schema.concept c1 - ON or1.stratum_2 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN ( - 405, - 605, - 705, - 805, - 1805 - ) - AND or1.stratum_2 IS NOT NULL - AND c1.concept_id IS NULL -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 6 CDM-conformance rule:invalid concept_id -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('WARNING: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; data with unmapped concepts') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 6 as rule_id, - null as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN ( - 2, - 4, - 5, - 200, - 301, - 400, - 500, - 505, - 600, - 700, - 800, - 900, - 1000, - 1609, - 1610 - ) - AND or1.stratum_1 = '0' -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---concept from the wrong vocabulary ---ruleid 7 CDM-conformance rule:gender - 12 HL7 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 7 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (2) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('gender') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 8 race - 13 CDC Race -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 8 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (4) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('race') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 9 ethnicity - 44 ethnicity -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary (CMS Ethnicity)') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 9 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (5) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('ethnicity') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 10 place of service - 14 CMS place of service, 24 OMOP visit -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 10 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (202) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('visit') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 11 CDM-conformance rule:specialty - 48 specialty -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary (Specialty)') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 11 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (301) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('provider specialty') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 12 condition occurrence, era - 1 SNOMED -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 12 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN ( - 400, - 1000 - ) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('condition','condition/drug', 'condition/meas', 'condition/obs', 'condition/procedure') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 13 drug exposure - 8 RxNorm -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 13 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN ( - 700, - 900 - ) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('drug','condition/drug', 'device/drug', 'drug/measurement', 'drug/obs', 'drug/procedure') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 14 procedure - 4 CPT4/5 HCPCS/3 ICD9P -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 14 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (600) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('procedure','condition/procedure', 'device/procedure', 'drug/procedure', 'obs/procedure') -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---15 observation - 6 LOINC - ---NOT APPLICABLE IN CDMv5 - - ---16 disease class - 40 DRG - ---NOT APPLICABLE IN CDMV5 - ---ruleid 17 revenue code - 43 revenue code -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary (revenue code)') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 17 as rule_id, - COUNT_BIG(DISTINCT stratum_1) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -INNER JOIN @vocab_database_schema.concept c1 - ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) -WHERE or1.analysis_id IN (1610) - AND or1.stratum_1 IS NOT NULL - AND c1.concept_id <> 0 - AND lower(c1.domain_id) NOT IN ('revenue code') -GROUP BY or1.analysis_id, - oa1.analysis_name; - - ---ruleid 18 ERROR: year of birth in the future -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have year of birth in the future, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 18 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (3) - AND CAST(or1.stratum_1 AS INT) > year(getdate()) - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - - ---ruleid 19 WARNING: year of birth < 1800 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have year of birth < 1800, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 19 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (3) - AND cAST(or1.stratum_1 AS INT) < 1800 - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 20 ERROR: age < 0 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have age < 0, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 20 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (101) - AND CAST(or1.stratum_1 AS INT) < 0 - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 21 ERROR: age > 150 (TODO lower number seems more appropriate) -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have age > 150, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 21 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (101) - AND CAST(or1.stratum_1 AS INT) > 150 - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 22 WARNING: monthly change > 100% -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id - - ) -SELECT DISTINCT ar1.analysis_id, - CAST(CONCAT('WARNING: ', cast(ar1.analysis_id as VARCHAR(10)), '-', aa1.analysis_name, '; theres a 100% change in monthly count of events') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 22 as rule_id - -FROM @results_database_schema.ACHILLES_analysis aa1 -INNER JOIN @results_database_schema.ACHILLES_results ar1 - ON aa1.analysis_id = ar1.analysis_id -INNER JOIN @results_database_schema.ACHILLES_results ar2 - ON ar1.analysis_id = ar2.analysis_id - AND ar1.analysis_id IN ( - 420, - 620, - 720, - 820, - 920, - 1020 - ) -WHERE ( - CAST(ar1.stratum_1 AS INT) + 1 = CAST(ar2.stratum_1 AS INT) - OR CAST(ar1.stratum_1 AS INT) + 89 = CAST(ar2.stratum_1 AS INT) - ) - AND 1.0 * abs(ar2.count_value - ar1.count_value) / ar1.count_value > 1 - AND ar1.count_value > 10; - ---ruleid 23 WARNING: monthly change > 100% at concept level -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT ar1.analysis_id, - CAST(CONCAT('WARNING: ', cast(ar1.analysis_id as VARCHAR(10)), '-', aa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT ar1.stratum_1) AS VARCHAR(19)), ' concepts have a 100% change in monthly count of events') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 23 as rule_id, - COUNT_BIG(DISTINCT ar1.stratum_1) as record_count -FROM @results_database_schema.ACHILLES_analysis aa1 -INNER JOIN @results_database_schema.ACHILLES_results ar1 - ON aa1.analysis_id = ar1.analysis_id -INNER JOIN @results_database_schema.ACHILLES_results ar2 - ON ar1.analysis_id = ar2.analysis_id - AND ar1.stratum_1 = ar2.stratum_1 - AND ar1.analysis_id IN ( - 402, - 602, - 702, - 802, - 902, - 1002 - ) -WHERE ( - ROUND(CAST(ar1.stratum_2 AS DECIMAL(18,4)),0) + 1 = ROUND(CAST(ar2.stratum_2 AS DECIMAL(18,4)),0) - OR ROUND(CAST(ar1.stratum_2 AS DECIMAL(18,4)),0) + 89 = ROUND(CAST(ar2.stratum_2 AS DECIMAL(18,4)),0) - ) - AND 1.0 * abs(ar2.count_value - ar1.count_value) / ar1.count_value > 1 - AND ar1.count_value > 10 -GROUP BY ar1.analysis_id, - aa1.analysis_name; - ---ruleid 24 WARNING: days_supply > 180 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT ord1.analysis_id, - CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.max_value) as VARCHAR(19)), '); max value should not be > 180') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 24 as rule_id, - COUNT_BIG(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN (715) - AND ord1.max_value > 180 -GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 25 WARNING: refills > 10 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT ord1.analysis_id, - CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.max_value) as VARCHAR(19)), '); max value should not be > 10') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 25 as rule_id, - COUNT_BIG(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN (716) - AND ord1.max_value > 10 -GROUP BY ord1.analysis_id, oa1.analysis_name; - ---ruleid 26 DQ rule: WARNING: quantity > 600 -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT ord1.analysis_id, - CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(count(ord1.max_value) as VARCHAR(19)), '); max value should not be > 600') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 26 as rule_id, - count(ord1.max_value) as record_count -FROM @results_database_schema.ACHILLES_results_dist ord1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON ord1.analysis_id = oa1.analysis_id -WHERE ord1.analysis_id IN (717) - AND ord1.max_value > 600 -GROUP BY ord1.analysis_id, oa1.analysis_name; - - - ---rules may require first a derived measure and the subsequent data quality ---check is simpler to implement ---also results are accessible even if the rule did not generate a warning - ---rule27 ---due to most likely missint sql cast errors it was removed from this release ---will be included after more testing ---being fixed in this update - ---compute derived measure first -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,stratum_1,measure_id) -select - CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, - CAST('Condition' AS VARCHAR(255)) as stratum_1, CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results_derived -cross join (select statistic_value as val from @results_database_schema.achilles_results_derived where measure_id like 'UnmappedData:ach_401:GlobalRowCnt') as st -where measure_id ='ach_401:GlobalRowCnt'; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,stratum_1,measure_id) -select - CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, - CAST('Procedure' AS VARCHAR(255)) as stratum_1, CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results_derived -cross join (select statistic_value as val from @results_database_schema.achilles_results_derived where measure_id = 'UnmappedData:ach_601:GlobalRowCnt') as st -where measure_id ='ach_601:GlobalRowCnt'; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,stratum_1,measure_id) -select - CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, - CAST('DrugExposure' AS VARCHAR(255)) as stratum_1, CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results_derived -cross join (select statistic_value as val from @results_database_schema.achilles_results_derived where measure_id = 'UnmappedData:ach_701:GlobalRowCnt') as st -where measure_id ='ach_701:GlobalRowCnt'; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,stratum_1,measure_id) -select - CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, - CAST('Observation' AS VARCHAR(255)) as stratum_1, CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results_derived -cross join (select statistic_value as val from @results_database_schema.achilles_results_derived where measure_id = 'UnmappedData:ach_801:GlobalRowCnt') as st -where measure_id ='ach_801:GlobalRowCnt'; - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,stratum_1,measure_id) -select - CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, - CAST('Measurement' AS VARCHAR(255)) as stratum_1, CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id -from @results_database_schema.achilles_results_derived -cross join (select statistic_value as val from @results_database_schema.achilles_results_derived where measure_id = 'UnmappedData:ach_1801:GlobalRowCnt') as st -where measure_id ='ach_1801:GlobalRowCnt'; - - ---actual rule27 - - INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) - SELECT - CAST(CONCAT('NOTIFICATION:Unmapped data over percentage threshold in:', cast(d.stratum_1 as varchar(100))) AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 27 as rule_id - FROM @results_database_schema.ACHILLES_results_derived d - where d.measure_id = 'UnmappedData:byDomain:Percentage' - and d.statistic_value > 0.1 --thresholds will be decided in the ongoing DQ-Study2 - ; - ---end of rule27 - ---rule28 DQ rule ---are all values (or more than threshold) in measurement table non numerical? ---(count of Measurment records with no numerical value is in analysis_id 1821) - - - -with t1 (all_count) as - (select sum(count_value) as all_count from @results_database_schema.achilles_results where analysis_id = 1820) -select - CAST(ct.count_value*CAST(100.0 AS FLOAT)/all_count AS FLOAT) as statistic_value, - CAST('Meas:NoNumValue:Percentage' AS VARCHAR(100)) as measure_id -into #tempResults -from t1 -cross join (select CAST(count_value AS FLOAT) as count_value from @results_database_schema.achilles_results where analysis_id = 1821) as ct -; - - -insert into @results_database_schema.ACHILLES_results_derived (statistic_value, measure_id) - select statistic_value, measure_id from #tempResults; - - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) -SELECT - CAST('NOTIFICATION: percentage of non-numerical measurement records exceeds general population threshold ' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 28 as rule_id, - cast(statistic_value as int) as record_count -FROM #tempResults t ---WHERE t.analysis_id IN (100730,100430) --umbrella version -WHERE measure_id='Meas:NoNumValue:Percentage' --t.analysis_id IN (100000) ---the intended threshold is 1 percent, this value is there to get pilot data from early adopters - AND t.statistic_value >= 80 -; - - ---clean up temp tables for rule 28 -truncate table #tempResults; -drop table #tempResults; - ---end of rule 28 - ---rule29 DQ rule ---unusual diagnosis present, this rule is terminology dependend - -with tempcnt as( - select sum(count_value) as pt_cnt from @results_database_schema.achilles_results - where analysis_id = 404 --dx by decile - and stratum_1 = '195075' --meconium - --and stratum_3 = '8507' --possible limit to males only - and cast(stratum_4 as int) >= 5 --fifth decile or more -) -select pt_cnt as record_count -into #tempResults ---set threshold here, currently it is zero -from tempcnt where pt_cnt > 0; - - ---using temp table because with clause that occurs prior insert into is causing problems ---and with clause makes the code more readable -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) -SELECT - CAST('WARNING:[PLAUSIBILITY] infant-age diagnosis (195075) at age 50+' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 29 as rule_id, - record_count -FROM #tempResults t; - -truncate table #tempResults; -drop table #tempResults; ---end of rule29 - - ---rule30 CDM-conformance rule: is CDM metadata table created at all? - --create a derived measure for rule30 - --done strangly to possibly avoid from dual error on Oracle - --done as not null just in case sqlRender has NOT NULL hard coded - --check if table exist and if yes - derive 1 for a derived measure - - --does not work on redshift :-( --commenting it out ---IF OBJECT_ID('@cdm_database_schema.CDM_SOURCE', 'U') IS NOT NULL ---insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) --- select distinct analysis_id as statistic_value, --- 'MetaData:TblExists' as measure_id --- from @results_database_schema.ACHILLES_results --- where analysis_id = 1; - - --actual rule30 - ---end of rule30 - - ---rule31 DQ rule ---ratio of providers to total patients - ---compute a derived reatio ---TODO if provider count is zero it will generate division by zero (not sure how dirrerent db engins will react) -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) - select CAST(1.0*ct.total_pts/count_value AS FLOAT) as statistic_value, CAST('Provider:PatientProviderRatio' AS VARCHAR(255)) as measure_id - from @results_database_schema.achilles_results - cross join (select count_value as total_pts from @results_database_schema.achilles_results r where analysis_id =1) ct - where analysis_id = 300 -; - ---actual rule -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -SELECT - CAST('NOTIFICATION:[PLAUSIBILITY] database has too few providers defined (given the total patient number)' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 31 as rule_id -FROM @results_database_schema.ACHILLES_results_derived d -where d.measure_id = 'Provider:PatientProviderRatio' -and d.statistic_value > 10000 --thresholds will be decided in the ongoing DQ-Study2 -; - ---rule32 DQ rule ---uses iris: patients with at least one visit visit ---does 100-THE IRIS MEASURE to check for percentage of patients with no visits - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -SELECT - CAST('NOTIFICATION: Percentage of patients with no visits exceeds threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 32 as rule_id -FROM @results_database_schema.ACHILLES_results_derived d -where d.measure_id = 'ach_2003:Percentage' -and 100-d.statistic_value > 27 --threshold identified in the DataQuality study -; - ---rule33 DQ rule (for general population only) ---NOTIFICATION: database does not have all age 0-80 represented - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -SELECT - CAST('NOTIFICATION: [GeneralPopulationOnly] Not all deciles represented at first observation' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 33 as rule_id -FROM @results_database_schema.ACHILLES_results_derived d -where d.measure_id = 'AgeAtFirstObsByDecile:DecileCnt' -and d.statistic_value <9 --we expect deciles 0,1,2,3,4,5,6,7,8 -; - - ---rule34 DQ rule ---NOTIFICATION: number of unmapped source values exceeds threshold ---related to rule 27 that looks at percentage of unmapped rows (rows as focus) ---this rule is looking at source values (as focus) - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) -SELECT - CAST(CONCAT('NOTIFICATION: Count of unmapped source values exceeds threshold in: ', cast(stratum_1 as varchar(100))) AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 34 as rule_id, - cast(statistic_value as int) as record_count -FROM @results_database_schema.ACHILLES_results_derived d -where measure_id = 'UnmappedDataByDomain:SourceValueCnt' -and statistic_value > 1000; --threshold will be decided in DQ study 2 - - - ---rule35 DQ rule, NOTIFICATION ---this rule analyzes Units recorded for measurement - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) - SELECT - CAST('NOTIFICATION: Count of measurement_ids with more than 5 distinct units exceeds threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 35 as rule_id, - cast(meas_concept_id_cnt as int) as record_count - from ( - select meas_concept_id_cnt from (select sum(freq) as meas_concept_id_cnt from - (select u_cnt, count(*) as freq from - (select stratum_1, count(*) as u_cnt - from @results_database_schema.achilles_results where analysis_id = 1807 group by stratum_1) a - group by u_cnt - ) b - where u_cnt >= 5 --threshold one for the rule - ) c - where meas_concept_id_cnt >= 10 --threshold two for the rule - ) d -; - - - ---ruleid 36 WARNING: age > 125 (related to an error grade rule 21 that has higher threshold) -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT or1.analysis_id, - CAST(CONCAT('WARNING: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have age > @ThresholdAgeWarning, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 36 as rule_id, - sum(or1.count_value) as record_count -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (101) - AND CAST(or1.stratum_1 AS INT) > @ThresholdAgeWarning - AND or1.count_value > 0 -GROUP BY or1.analysis_id, - oa1.analysis_name; - ---ruleid 37 DQ rule - ---derived measure for this rule - ratio of notes over the number of visits -insert into @results_database_schema.ACHILLES_results_derived (statistic_value,measure_id) -SELECT CAST(1.0*c1.all_notes/1.0*c2.all_visits AS FLOAT) as statistic_value, CAST( 'Note:NoteVisitRatio' AS VARCHAR(255)) as measure_id -FROM (SELECT sum(count_value) as all_notes FROM @results_database_schema.achilles_results r WHERE analysis_id =2201 ) c1 -CROSS JOIN (SELECT sum(count_value) as all_visits FROM @results_database_schema.achilles_results r WHERE analysis_id =201 ) c2; - ---one co-author of the DataQuality study suggested measuring data density on visit level (in addition to --- patient and dataset level) ---Assumption is that at least one data event (e.g., diagnisis, note) is generated for each visit ---this rule is testing that at least some notes exist (considering the number of visits) ---for datasets with zero notes the derived measure is null and rule does not fire at all ---possible elaboration of this rule include number of inpatient notes given number of inpatient visits ---current rule is on overall data density (for notes only) per visit level - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) -SELECT - CAST('NOTIFICATION: Notes data density is below threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 37 as rule_id, - cast(statistic_value as int) as record_count -FROM @results_database_schema.ACHILLES_results_derived d -where measure_id = 'Note:NoteVisitRatio' -and statistic_value < 0.01; --threshold will be decided in DataQuality study - - - - ---ruleid 38 DQ rule; in a general dataset, it is expected that more than providers with a wide range of specialties ---(at least more than just one specialty) is present ---notification may indicate that provider table is missing data on specialty ---typical dataset has at least 28 specialties present in provider table - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) -SELECT - CAST('NOTIFICATION: [GeneralPopulationOnly] Count of distinct specialties of providers in the PROVIDER table is below threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 38 as rule_id, - cast(statistic_value as int) as record_count -FROM @results_database_schema.ACHILLES_results_derived d -where measure_id = 'Provider:SpeciatlyCnt' -and statistic_value <2; --DataQuality data indicate median of 55 specialties (percentile25 is 28; percentile10 is 2) - - ---ruleid 39 DQ rule; Given lifetime record DQ assumption if more than 30k patients is born for every deceased patient ---the dataset may not be recording complete records for all senior patients in that year ---derived ratio measure Death:BornDeceasedRatio only exists for years where death data exist ---to avoid alerting on too early years such as 1925 where births exist but no deaths - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id,record_count) -select - CAST('NOTIFICATION: [GeneralPopulationOnly] In some years, number of deaths is too low considering the number of births (lifetime record DQ assumption)' AS VARCHAR(255)) - as achilles_heel_warning, - 39 as rule_id, - year_cnt as record_count - from - (select count(*) as year_cnt from @results_database_schema.achilles_results_derived - where measure_id = 'Death:BornDeceasedRatio' and statistic_value > 30000) a -where a.year_cnt> 0; - - ---ruleid 40 this rule was under umbrella rule 1 and was made into a separate rule - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results ( - analysis_id, - ACHILLES_HEEL_warning, - rule_id, - record_count - ) -SELECT DISTINCT or1.analysis_id, - CAST(CONCAT('ERROR: Death event outside observation period, ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; count (n=', cast(or1.count_value as VARCHAR(19)), ') should not be > 0') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, - 40 as rule_id, - or1.count_value -FROM @results_database_schema.ACHILLES_results or1 -INNER JOIN @results_database_schema.ACHILLES_analysis oa1 - ON or1.analysis_id = oa1.analysis_id -WHERE or1.analysis_id IN (510) - AND or1.count_value > 0; - - ---ruleid 41 DQ rule, data density ---porting a Sentinel rule that checks for certain vital signs data (weight, in this case) ---multiple concepts_ids may be added to broaden the rule, however standardizing on a single ---concept would be more optimal - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -select CAST('NOTIFICATION:No body weight data in MEASUREMENT table (under concept_id 3025315 (LOINC code 29463-7))' AS VARCHAR(255)) - as achilles_heel_warning, - 41 as rule_id -from -(select count(*) as row_present - from @results_database_schema.achilles_results - where analysis_id = 1800 and stratum_1 = '3025315' -) a -where a.row_present = 0; - - - ---ruleid 42 DQ rule ---Percentage of outpatient visits (concept_id 9202) is too low (for general population). ---This may indicate a dataset with mostly inpatient data (that may be biased and missing some EHR events) ---Threshold was decided as 10th percentile in empiric comparison of 12 real world datasets in the DQ-Study2 - - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -select CAST('NOTIFICATION: [GeneralPopulationOnly] Percentage of outpatient visits is below threshold' AS VARCHAR(255)) - as achilles_heel_warning, - 42 as rule_id -from - ( - select - 1.0*achilles_results.count_value/c1.count_value as outp_perc - from @results_database_schema.achilles_results - cross join (select sum(count_value) as count_value from @results_database_schema.achilles_results where analysis_id = 201) c1 - where analysis_id = 201 and stratum_1='9202' - ) d -where d.outp_perc < @ThresholdOutpatientVisitPerc; - ---ruleid 43 DQ rule ---looks at observation period data, if all patients have exactly one the rule alerts the user ---This rule is based on majority of real life datasets. ---For some datasets (e.g., UK national data with single payor, one observation period is perfectly valid) - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -select CAST('NOTIFICATION: 99+ percent of persons have exactly one observation period' AS VARCHAR(255)) - as achilles_heel_warning, - 43 as rule_id -from - (select 100.0*achilles_results.count_value/ct.total_pts as one_obs_per_perc - from @results_database_schema.achilles_results - cross join (select count_value as total_pts from @results_database_schema.achilles_results r where analysis_id =1) as ct - where analysis_id = 113 and stratum_1 = '1' - ) d -where d.one_obs_per_perc >= 99.0; - - - ---ruleid 44 DQ rule ---uses iris measure: patients with at least 1 Meas, 1 Dx and 1 Rx - - -INSERT INTO @results_database_schema.ACHILLES_HEEL_results (ACHILLES_HEEL_warning,rule_id) -SELECT - CAST('NOTIFICATION: Percentage of patients with at least 1 Measurement, 1 Dx and 1 Rx is below threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, - 44 as rule_id -FROM @results_database_schema.ACHILLES_results_derived d -where d.measure_id = 'ach_2002:Percentage' -and d.statistic_value < @ThresholdMinimalPtMeasDxRx --threshold identified in the DataQuality study -; - - - diff --git a/inst/sql/sql_server/Achilles_v4.sql b/inst/sql/sql_server/Achilles_v4.sql deleted file mode 100644 index b3c6b415..00000000 --- a/inst/sql/sql_server/Achilles_v4.sql +++ /dev/null @@ -1,4882 +0,0 @@ -/****************************************************************** - -# @file ACHILLES_v4.SQL -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of ACHILLES -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics - - - - -*******************************************************************/ - - -/******************************************************************* - -Achilles - database profiling summary statistics generation - -SQL for OMOP CDM v4 - - -*******************************************************************/ - -{DEFAULT @cdm_database = 'CDM'} -{DEFAULT @results_database = 'scratch'} -{DEFAULT @results_database_schema = 'scratch.dbo'} -{DEFAULT @source_name = 'CDM NAME'} -{DEFAULT @smallcellcount = 5} -{DEFAULT @createTable = TRUE} -{DEFAULT @validateSchema = FALSE} - - /**** - developer comment about general ACHILLES calculation process: - you could drive # of persons by age decile, from # of persons by age decile by gender - as a general rule: do full stratification once, and then aggregate across strata to avoid re-calculation - works for all prevalence calculations...does not work for any distribution statistics - *****/ - ---{@validateSchema}?{ - --- RSD - 2014-10-27 --- Execute a series of quick select statements to verify that the CDM schema --- has all the proper tables and columns --- The point is to catch any missing tables/columns here before we spend hours --- generating results before bombing out - -create table #TableCheck -( - tablename varchar(50) -) -; - -insert into #TableCheck (tablename) -select 'care_site' -from ( -SELECT - care_site_id, - location_id, - organization_id, - place_of_service_concept_id, - care_site_source_value, - place_of_service_source_value, - row_number() over (order by care_site_id) rn -FROM - @cdm_database_schema.care_site -) CARE_SITE -WHERE rn = 1; - - -insert into #TableCheck (tablename) -select 'cohort' -from ( -SELECT - cohort_id, - cohort_concept_id, - cohort_start_date, - cohort_end_date, - subject_id, - stop_reason, - row_number() over (order by cohort_concept_id) rn - -FROM - @cdm_database_schema.cohort -) COHORT -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'condition_era' -from ( -SELECT - condition_era_id, - person_id, - condition_concept_id, - condition_era_start_date, - condition_era_end_date, - condition_type_concept_id, - condition_occurrence_count, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.condition_era -) CONDITION_ERA -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'condition_occurrence' -from ( -SELECT - condition_occurrence_id, - person_id, - condition_concept_id, - condition_start_date, - condition_end_date, - condition_type_concept_id, - stop_reason, - associated_provider_id, - visit_occurrence_id, - condition_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.condition_occurrence -) condition_occurrence -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'death' -from ( -SELECT - person_id, - death_date, - death_type_concept_id, - cause_of_death_concept_id, - cause_of_death_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.death -) death -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_cost' -from ( -SELECT - drug_cost_id, - drug_exposure_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid, - ingredient_cost, - dispensing_fee, - average_wholesale_price, - payer_plan_period_id, - row_number() over (order by drug_cost_id) rn -FROM - @cdm_database_schema.drug_cost -) drug_cost -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_era' -from ( -SELECT - drug_era_id, - person_id, - drug_concept_id, - drug_era_start_date, - drug_era_end_date, - drug_type_concept_id, - drug_exposure_count, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.drug_era -) drug_era -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_exposure' -from ( -SELECT - drug_exposure_id, - person_id, - drug_concept_id, - drug_exposure_start_date, - drug_exposure_end_date, - drug_type_concept_id, - stop_reason, - refills, - quantity, - days_supply, - sig, - prescribing_provider_id, - visit_occurrence_id, - relevant_condition_concept_id, - drug_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.drug_exposure -) drug_exposure -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'location' -from ( -SELECT - location_id, - address_1, - address_2, - city, - STATE, - zip, - county, - location_source_value, - row_number() over (order by location_id) rn -FROM - @cdm_database_schema.location -) location -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'observation' -from ( -SELECT - observation_id, - person_id, - observation_concept_id, - observation_date, - observation_time, - value_as_number, - value_as_string, - value_as_concept_id, - unit_concept_id, - range_low, - range_high, - observation_type_concept_id, - associated_provider_id, - visit_occurrence_id, - relevant_condition_concept_id, - observation_source_value, - unit_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.observation -) location -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'observation_period' -from ( -SELECT - observation_period_id, - person_id, - observation_period_start_date, - observation_period_end_date, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.observation_period -) observation_period -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'organization' -from ( -SELECT - organization_id, - place_of_service_concept_id, - location_id, - organization_source_value, - place_of_service_source_value, - row_number() over (order by organization_id) rn -FROM - @cdm_database_schema.organization -) organization -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'payer_plan_period' -from ( -SELECT - payer_plan_period_id, - person_id, - payer_plan_period_start_date, - payer_plan_period_end_date, - payer_source_value, - plan_source_value, - family_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.payer_plan_period -) payer_plan_period -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'person' -from ( -SELECT - person_id, - gender_concept_id, - year_of_birth, - month_of_birth, - day_of_birth, - race_concept_id, - ethnicity_concept_id, - location_id, - provider_id, - care_site_id, - person_source_value, - gender_source_value, - race_source_value, - ethnicity_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.person -) person -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'procedure_cost' -from ( -SELECT - procedure_cost_id, - procedure_occurrence_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid, - disease_class_concept_id, - revenue_code_concept_id, - payer_plan_period_id, - disease_class_source_value, - revenue_code_source_value, - row_number() over (order by procedure_cost_id) rn -FROM - @cdm_database_schema.procedure_cost -) procedure_cost -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'procedure_occurrence' -from ( -SELECT - procedure_occurrence_id, - person_id, - procedure_concept_id, - procedure_date, - procedure_type_concept_id, - associated_provider_id, - visit_occurrence_id, - relevant_condition_concept_id, - procedure_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.procedure_occurrence -) procedure_occurrence -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'provider' -from ( -SELECT - provider_id, - NPI, - DEA, - specialty_concept_id, - care_site_id, - provider_source_value, - specialty_source_value, - row_number() over (order by provider_id) rn -FROM - @cdm_database_schema.provider -) provider -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'visit_occurrence' -from ( -SELECT - visit_occurrence_id, - person_id, - visit_start_date, - visit_end_date, - place_of_service_concept_id, - care_site_id, - place_of_service_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.visit_occurrence -) visit_occurrence -WHERE rn = 1; - -TRUNCATE TABLE #TableCheck; -DROP TABLE #TableCheck; - ---} - ---{@createTable}?{ - -IF OBJECT_ID('@results_database_schema.ACHILLES_analysis', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_analysis; - -create table @results_database_schema.ACHILLES_analysis -( - analysis_id int, - analysis_name varchar(255), - stratum_1_name varchar(255), - stratum_2_name varchar(255), - stratum_3_name varchar(255), - stratum_4_name varchar(255), - stratum_5_name varchar(255) -); - - -IF OBJECT_ID('@results_database_schema.ACHILLES_results', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_results; - -create table @results_database_schema.ACHILLES_results -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - stratum_3 varchar(255), - stratum_4 varchar(255), - stratum_5 varchar(255), - count_value bigint -); - - -IF OBJECT_ID('@results_database_schema.ACHILLES_results_dist', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_results_dist; - -create table @results_database_schema.ACHILLES_results_dist -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - stratum_3 varchar(255), - stratum_4 varchar(255), - stratum_5 varchar(255), - count_value bigint, - min_value float, - max_value float, - avg_value float, - stdev_value float, - median_value float, - p10_value float, - p25_value float, - p75_value float, - p90_value float -); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (0, 'Source name'); - ---000. PERSON statistics - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1, 'Number of persons'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (2, 'Number of persons by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (3, 'Number of persons by year of birth', 'year_of_birth'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (4, 'Number of persons by race', 'race_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (5, 'Number of persons by ethnicity', 'ethnicity_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (7, 'Number of persons with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (8, 'Number of persons with invalid location_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (9, 'Number of persons with invalid care_site_id'); - - ---100. OBSERVATION_PERIOD (joined to PERSON) - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (101, 'Number of persons by age, with age at first observation period', 'age'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (102, 'Number of persons by gender by age, with age at first observation period', 'gender_concept_id', 'age'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (103, 'Distribution of age at first observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (104, 'Distribution of age at first observation period by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (105, 'Length of observation (days) of first observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (106, 'Length of observation (days) of first observation period by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (107, 'Length of observation (days) of first observation period by age decile', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (108, 'Number of persons by length of observation period, in 30d increments', 'Observation period length 30d increments'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (109, 'Number of persons with continuous observation in each year', 'calendar year'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (110, 'Number of persons with continuous observation in each month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (111, 'Number of persons by observation period start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (112, 'Number of persons by observation period end month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (113, 'Number of persons by number of observation periods', 'number of observation periods'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (114, 'Number of persons with observation period before year-of-birth'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (115, 'Number of persons with observation period end < observation period start'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name) - values (116, 'Number of persons with at least one day of observation in each year by gender and age decile', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (117, 'Number of persons with at least one day of observation in each month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (118, 'Number of observation periods with invalid person_id'); - - - ---200- VISIT_OCCURRENCE - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (200, 'Number of persons with at least one visit occurrence, by visit_concept_id', 'visit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (201, 'Number of visit occurrence records, by visit_concept_id', 'visit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (202, 'Number of persons by visit occurrence start month, by visit_concept_id', 'visit_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (203, 'Number of distinct visit occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (204, 'Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile', 'visit_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (206, 'Distribution of age by visit_concept_id', 'visit_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (207, 'Number of visit records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (208, 'Number of visit records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (209, 'Number of visit records with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (210, 'Number of visit records with invalid care_site_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (211, 'Distribution of length of stay by visit_concept_id', 'visit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (220, 'Number of visit occurrence records by visit occurrence start month', 'calendar month'); - - - ---300- PROVIDER -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (300, 'Number of providers'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (301, 'Number of providers by specialty concept_id', 'specialty_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (302, 'Number of providers with invalid care site id'); - - - ---400- CONDITION_OCCURRENCE - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (400, 'Number of persons with at least one condition occurrence, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (401, 'Number of condition occurrence records, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (402, 'Number of persons by condition occurrence start month, by condition_concept_id', 'condition_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (403, 'Number of distinct condition occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (404, 'Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile', 'condition_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (405, 'Number of condition occurrence records, by condition_concept_id by condition_type_concept_id', 'condition_concept_id', 'condition_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (406, 'Distribution of age by condition_concept_id', 'condition_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (409, 'Number of condition occurrence records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (410, 'Number of condition occurrence records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (411, 'Number of condition occurrence records with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (412, 'Number of condition occurrence records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (413, 'Number of condition occurrence records with invalid visit_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (420, 'Number of condition occurrence records by condition occurrence start month', 'calendar month'); - ---500- DEATH - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (500, 'Number of persons with death, by cause_of_death_concept_id', 'cause_of_death_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (501, 'Number of records of death, by cause_of_death_concept_id', 'cause_of_death_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (502, 'Number of persons by death month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name) - values (504, 'Number of persons with a death, by calendar year by gender by age decile', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (505, 'Number of death records, by death_type_concept_id', 'death_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (506, 'Distribution of age at death by gender', 'gender_concept_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (509, 'Number of death records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (510, 'Number of death records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (511, 'Distribution of time from death to last condition'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (512, 'Distribution of time from death to last drug'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (513, 'Distribution of time from death to last visit'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (514, 'Distribution of time from death to last procedure'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (515, 'Distribution of time from death to last observation'); - - ---600- PROCEDURE_OCCURRENCE - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (600, 'Number of persons with at least one procedure occurrence, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (601, 'Number of procedure occurrence records, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (602, 'Number of persons by procedure occurrence start month, by procedure_concept_id', 'procedure_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (603, 'Number of distinct procedure occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (604, 'Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile', 'procedure_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (605, 'Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id', 'procedure_concept_id', 'procedure_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (606, 'Distribution of age by procedure_concept_id', 'procedure_concept_id', 'gender_concept_id'); - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (609, 'Number of procedure occurrence records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (610, 'Number of procedure occurrence records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (612, 'Number of procedure occurrence records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (613, 'Number of procedure occurrence records with invalid visit_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (620, 'Number of procedure occurrence records by procedure occurrence start month', 'calendar month'); - - ---700- DRUG_EXPOSURE - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (700, 'Number of persons with at least one drug exposure, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (701, 'Number of drug exposure records, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (702, 'Number of persons by drug exposure start month, by drug_concept_id', 'drug_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (703, 'Number of distinct drug exposure concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (704, 'Number of persons with at least one drug exposure, by drug_concept_id by calendar year by gender by age decile', 'drug_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (705, 'Number of drug exposure records, by drug_concept_id by drug_type_concept_id', 'drug_concept_id', 'drug_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (706, 'Distribution of age by drug_concept_id', 'drug_concept_id', 'gender_concept_id'); - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (709, 'Number of drug exposure records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (710, 'Number of drug exposure records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (711, 'Number of drug exposure records with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (712, 'Number of drug exposure records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (713, 'Number of drug exposure records with invalid visit_id'); - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (715, 'Distribution of days_supply by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (716, 'Distribution of refills by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (717, 'Distribution of quantity by drug_concept_id', 'drug_concept_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (720, 'Number of drug exposure records by drug exposure start month', 'calendar month'); - - ---800- OBSERVATION - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (800, 'Number of persons with at least one observation occurrence, by observation_concept_id', 'observation_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (801, 'Number of observation occurrence records, by observation_concept_id', 'observation_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (802, 'Number of persons by observation occurrence start month, by observation_concept_id', 'observation_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (803, 'Number of distinct observation occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (804, 'Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile', 'observation_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (805, 'Number of observation occurrence records, by observation_concept_id by observation_type_concept_id', 'observation_concept_id', 'observation_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (806, 'Distribution of age by observation_concept_id', 'observation_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (807, 'Number of observation occurrence records, by observation_concept_id and unit_concept_id', 'observation_concept_id', 'unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (809, 'Number of observation records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (810, 'Number of observation records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (812, 'Number of observation records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (813, 'Number of observation records with invalid visit_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (814, 'Number of observation records with no value (numeric, string, or concept)'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (815, 'Distribution of numeric values, by observation_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (816, 'Distribution of low range, by observation_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (817, 'Distribution of high range, by observation_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (818, 'Number of observation records below/within/above normal range, by observation_concept_id and unit_concept_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (820, 'Number of observation records by observation start month', 'calendar month'); - - - ---900- DRUG_ERA - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (900, 'Number of persons with at least one drug era, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (901, 'Number of drug era records, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (902, 'Number of persons by drug era start month, by drug_concept_id', 'drug_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (903, 'Number of distinct drug era concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (904, 'Number of persons with at least one drug era, by drug_concept_id by calendar year by gender by age decile', 'drug_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (906, 'Distribution of drug era age by drug_concept_id', 'drug_concept_id', 'gender_concept_id'); -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (907, 'Distribution of drug era length, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (908, 'Number of drug eras without valid person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (909, 'Number of drug eras outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (910, 'Number of drug eras with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (920, 'Number of drug era records by drug era start month', 'calendar month'); - ---1000- CONDITION_ERA - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1000, 'Number of persons with at least one condition era, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1001, 'Number of condition era records, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1002, 'Number of persons by condition era start month, by condition_concept_id', 'condition_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1003, 'Number of distinct condition era concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (1004, 'Number of persons with at least one condition era, by condition_concept_id by calendar year by gender by age decile', 'condition_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1006, 'Distribution of condition era age by condition_concept_id', 'condition_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1007, 'Distribution of condition era length, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1008, 'Number of condition eras without valid person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1009, 'Number of condition eras outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1010, 'Number of condition eras with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1020, 'Number of condition era records by condition era start month', 'calendar month'); - - - ---1100- LOCATION - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1100, 'Number of persons by location 3-digit zip', '3-digit zip'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1101, 'Number of persons by location state', 'state'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1102, 'Number of care sites by location 3-digit zip', '3-digit zip'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1103, 'Number of care sites by location state', 'state'); - - ---1200- CARE_SITE - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1200, 'Number of persons by place of service', 'place_of_service_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1201, 'Number of visits by place of service', 'place_of_service_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1202, 'Number of care sites by place of service', 'place_of_service_concept_id'); - - ---1300- ORGANIZATION - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1300, 'Number of organizations by place of service', 'place_of_service_concept_id'); - - ---1400- PAYOR_PLAN_PERIOD - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1406, 'Length of payer plan (days) of first payer plan period by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1407, 'Length of payer plan (days) of first payer plan period by age decile', 'age_decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1408, 'Number of persons by length of payer plan period, in 30d increments', 'payer plan period length 30d increments'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1409, 'Number of persons with continuous payer plan in each year', 'calendar year'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1410, 'Number of persons with continuous payer plan in each month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1411, 'Number of persons by payer plan period start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1412, 'Number of persons by payer plan period end month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1413, 'Number of persons by number of payer plan periods', 'number of payer plan periods'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1414, 'Number of persons with payer plan period before year-of-birth'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1415, 'Number of persons with payer plan period end < payer plan period start'); - ---1500- DRUG_COST - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1500, 'Number of drug cost records with invalid drug exposure id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1501, 'Number of drug cost records with invalid payer plan period id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1502, 'Distribution of paid copay, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1503, 'Distribution of paid coinsurance, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1504, 'Distribution of paid toward deductible, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1505, 'Distribution of paid by payer, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1506, 'Distribution of paid by coordination of benefit, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1507, 'Distribution of total out-of-pocket, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1508, 'Distribution of total paid, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1509, 'Distribution of ingredient_cost, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1510, 'Distribution of dispensing fee, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1511, 'Distribution of average wholesale price, by drug_concept_id', 'drug_concept_id'); - - ---1600- PROCEDURE_COST - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1600, 'Number of procedure cost records with invalid procedure occurrence id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1601, 'Number of procedure cost records with invalid payer plan period id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1602, 'Distribution of paid copay, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1603, 'Distribution of paid coinsurance, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1604, 'Distribution of paid toward deductible, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1605, 'Distribution of paid by payer, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1606, 'Distribution of paid by coordination of benefit, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1607, 'Distribution of total out-of-pocket, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1608, 'Distribution of total paid, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1609, 'Number of records by disease_class_concept_id', 'disease_class_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1610, 'Number of records by revenue_code_concept_id', 'revenue_code_concept_id'); - - ---1700- COHORT - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1700, 'Number of records by cohort_concept_id', 'cohort_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1701, 'Number of records with cohort end date < cohort start date'); - ---} : {else if not createTable -delete from @results_database_schema.ACHILLES_results where analysis_id IN (@list_of_analysis_ids); -delete from @results_database_schema.ACHILLES_results_dist where analysis_id IN (@list_of_analysis_ids); -} - -/**** -7. generate results for analysis_results - - -****/ - ---{0 IN (@list_of_analysis_ids)}?{ --- 0 Number of persons -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 0 as analysis_id, '@source_name' as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value) -select 0 as analysis_id, '@source_name' as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON; - ---} - - -/******************************************** - -ACHILLES Analyses on PERSON table - -*********************************************/ - ---{1 IN (@list_of_analysis_ids)}?{ --- 1 Number of persons -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1 as analysis_id, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON; ---} - - ---{2 IN (@list_of_analysis_ids)}?{ --- 2 Number of persons by gender -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 2 as analysis_id, gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by GENDER_CONCEPT_ID; ---} - - - ---{3 IN (@list_of_analysis_ids)}?{ --- 3 Number of persons by year of birth -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 3 as analysis_id, year_of_birth as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by YEAR_OF_BIRTH; ---} - - ---{4 IN (@list_of_analysis_ids)}?{ --- 4 Number of persons by race -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 4 as analysis_id, RACE_CONCEPT_ID as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by RACE_CONCEPT_ID; ---} - - - ---{5 IN (@list_of_analysis_ids)}?{ --- 5 Number of persons by ethnicity -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 5 as analysis_id, ETHNICITY_CONCEPT_ID as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by ETHNICITY_CONCEPT_ID; ---} - - - - - ---{7 IN (@list_of_analysis_ids)}?{ --- 7 Number of persons with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 7 as analysis_id, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - left join @cdm_database_schema.provider pr1 - on p1.provider_id = pr1.provider_id -where p1.provider_id is not null - and pr1.provider_id is null -; ---} - - - ---{8 IN (@list_of_analysis_ids)}?{ --- 8 Number of persons with invalid location_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 8 as analysis_id, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - left join @cdm_database_schema.location l1 - on p1.location_id = l1.location_id -where p1.location_id is not null - and l1.location_id is null -; ---} - - ---{9 IN (@list_of_analysis_ids)}?{ --- 9 Number of persons with invalid care_site_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 9 as analysis_id, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - left join @cdm_database_schema.care_site cs1 - on p1.care_site_id = cs1.care_site_id -where p1.care_site_id is not null - and cs1.care_site_id is null -; ---} - - - - - - - -/******************************************** - -ACHILLES Analyses on OBSERVATION_PERIOD table - -*********************************************/ - ---{101 IN (@list_of_analysis_ids)}?{ --- 101 Number of persons by age, with age at first observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 101 as analysis_id, year(op1.index_date) - p1.YEAR_OF_BIRTH as stratum_1, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 - on p1.PERSON_ID = op1.PERSON_ID -group by year(op1.index_date) - p1.YEAR_OF_BIRTH; ---} - - - ---{102 IN (@list_of_analysis_ids)}?{ --- 102 Number of persons by gender by age, with age at first observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 102 as analysis_id, p1.gender_concept_id as stratum_1, year(op1.index_date) - p1.YEAR_OF_BIRTH as stratum_2, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 - on p1.PERSON_ID = op1.PERSON_ID -group by p1.gender_concept_id, year(op1.index_date) - p1.YEAR_OF_BIRTH; ---} - - ---{103 IN (@list_of_analysis_ids)}?{ --- 103 Distribution of age at first observation period -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 103 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( - select year(op1.index_date) - p1.YEAR_OF_BIRTH as count_value, - 1.0*(row_number() over (order by year(op1.index_date) - p1.YEAR_OF_BIRTH))/(COUNT_BIG(*) over () + 1) as p1 - from @cdm_database_schema.PERSON p1 - join - ( - select person_id, MIN(observation_period_start_date) as index_date from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID - ) op1 on p1.PERSON_ID = op1.PERSON_ID -) t1 -; ---} - - - - ---{104 IN (@list_of_analysis_ids)}?{ --- 104 Distribution of age at first observation period by gender -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 104 as analysis_id, - gender_concept_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( - select p1.gender_concept_id, - year(op1.index_date) - p1.YEAR_OF_BIRTH as count_value, - 1.0*(row_number() over (partition by p1.gender_concept_id order by year(op1.index_date) - p1.YEAR_OF_BIRTH))/(COUNT_BIG(*) over (partition by p1.gender_concept_id)+1) as p1 - from - @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 on p1.PERSON_ID = op1.PERSON_ID -) t1 -group by gender_concept_id -; ---} - ---{105 IN (@list_of_analysis_ids)}?{ --- 105 Length of observation (days) of first observation period -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 105 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select DATEDIFF(dd,op1.observation_period_start_date, op1.observation_period_end_date) as count_value, - 1.0*(row_number() over (order by DATEDIFF(dd,op1.observation_period_start_date, op1.observation_period_end_date)))/(COUNT_BIG(*) over() + 1) as p1 -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - OBSERVATION_PERIOD_START_DATE, - OBSERVATION_PERIOD_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by observation_period_start_date asc) as rn1 - from @cdm_database_schema.OBSERVATION_PERIOD - ) op1 on p1.PERSON_ID = op1.PERSON_ID - where op1.rn1 = 1 -) t1 -; ---} - - ---{106 IN (@list_of_analysis_ids)}?{ --- 106 Length of observation (days) of first observation period by gender -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 106 as analysis_id, - gender_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select p1.gender_concept_id, - DATEDIFF(dd,op1.observation_period_start_date, op1.observation_period_end_date) as count_value, - 1.0*(row_number() over (partition by p1.gender_concept_id order by DATEDIFF(dd,op1.observation_period_start_date, op1.observation_period_end_date)))/(COUNT_BIG(*) over (partition by p1.gender_concept_id) + 1) as p1 -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - OBSERVATION_PERIOD_START_DATE, - OBSERVATION_PERIOD_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by observation_period_start_date asc) as rn1 - from @cdm_database_schema.OBSERVATION_PERIOD - ) op1 on p1.PERSON_ID = op1.PERSON_ID - where op1.rn1 = 1 -) t1 -group by gender_concept_id -; ---} - - - ---{107 IN (@list_of_analysis_ids)}?{ --- 107 Length of observation (days) of first observation period by age decile -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 107 as analysis_id, - age_decile as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select floor((year(op1.OBSERVATION_PERIOD_START_DATE) - p1.YEAR_OF_BIRTH)/10) as age_decile, - DATEDIFF(dd,op1.observation_period_start_date, op1.observation_period_end_date) as count_value, - 1.0*(row_number() over (partition by floor((year(op1.OBSERVATION_PERIOD_START_DATE) - p1.YEAR_OF_BIRTH)/10) order by DATEDIFF(dd,op1.observation_period_start_date, op1.observation_period_end_date)))/(COUNT_BIG(*) over (partition by floor((year(op1.OBSERVATION_PERIOD_START_DATE) - p1.YEAR_OF_BIRTH)/10))+1) as p1 -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - OBSERVATION_PERIOD_START_DATE, - OBSERVATION_PERIOD_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by observation_period_start_date asc) as rn1 - from @cdm_database_schema.OBSERVATION_PERIOD - ) op1 on p1.PERSON_ID = op1.PERSON_ID -where op1.rn1 = 1 -) t1 -group by age_decile -; ---} - - ---{108 IN (@list_of_analysis_ids)}?{ --- 108 Number of persons by length of observation period, in 30d increments -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 108 as analysis_id, floor(DATEDIFF(dd, op1.observation_period_start_date, op1.observation_period_end_date)/30) as stratum_1, COUNT_BIG(distinct p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - OBSERVATION_PERIOD_START_DATE, - OBSERVATION_PERIOD_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by observation_period_start_date asc) as rn1 - from @cdm_database_schema.OBSERVATION_PERIOD - ) op1 - on p1.PERSON_ID = op1.PERSON_ID - where op1.rn1 = 1 -group by floor(DATEDIFF(dd, op1.observation_period_start_date, op1.observation_period_end_date)/30) -; ---} - - - - ---{109 IN (@list_of_analysis_ids)}?{ --- 109 Number of persons with continuous observation in each year --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -SELECT DISTINCT - YEAR(observation_period_start_date) AS obs_year, - DATEFROMPARTS(YEAR(observation_period_start_date),1,1) AS obs_year_start, - DATEFROMPARTS(YEAR(observation_period_start_date),12,31) AS obs_year_end -INTO - #temp_dates -FROM - @cdm_database_schema.observation_period -; - -INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -SELECT - 109 AS analysis_id, - obs_year AS stratum_1, - COUNT_BIG(DISTINCT person_id) AS count_value -FROM - @cdm_database_schema.observation_period, - #temp_dates -WHERE - observation_period_start_date <= obs_year_start - AND - observation_period_end_date >= obs_year_end -GROUP BY - obs_year -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - ---{110 IN (@list_of_analysis_ids)}?{ --- 110 Number of persons with continuous observation in each month --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -SELECT DISTINCT - YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) AS obs_month, - DATEFROMPARTS(YEAR(observation_period_start_date),(MONTH(OBSERVATION_PERIOD_START_DATE),1) AS obs_month_start, - EOMONTH(OBSERVATION_PERIOD_START_DATE) AS obs_month_end -INTO - #temp_dates -FROM - @cdm_database_schema.observation_period -; - - -INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -SELECT - 110 AS analysis_id, - obs_month AS stratum_1, - COUNT_BIG(DISTINCT person_id) AS count_value -FROM - @cdm_database_schema.observation_period, - #temp_Dates -WHERE - observation_period_start_date <= obs_month_start - AND - observation_period_end_date >= obs_month_end -GROUP BY - obs_month -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - ---{111 IN (@list_of_analysis_ids)}?{ --- 111 Number of persons by observation period start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 111 as analysis_id, - YEAR(observation_period_start_date)*100 + month(OBSERVATION_PERIOD_START_DATE) as stratum_1, - COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 -group by YEAR(observation_period_start_date)*100 + month(OBSERVATION_PERIOD_START_DATE) -; ---} - - - ---{112 IN (@list_of_analysis_ids)}?{ --- 112 Number of persons by observation period end month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 112 as analysis_id, - YEAR(observation_period_end_date)*100 + month(observation_period_end_date) as stratum_1, - COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 -group by YEAR(observation_period_end_date)*100 + month(observation_period_end_date) -; ---} - - ---{113 IN (@list_of_analysis_ids)}?{ --- 113 Number of persons by number of observation periods -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 113 as analysis_id, - op1.num_periods as stratum_1, COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - (select person_id, COUNT_BIG(OBSERVATION_period_start_date) as num_periods from @cdm_database_schema.observation_period group by PERSON_ID) op1 -group by op1.num_periods -; ---} - ---{114 IN (@list_of_analysis_ids)}?{ --- 114 Number of persons with observation period before year-of-birth -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 114 as analysis_id, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(year(OBSERVATION_period_start_date)) as first_obs_year from @cdm_database_schema.observation_period group by PERSON_ID) op1 - on p1.person_id = op1.person_id -where p1.year_of_birth > op1.first_obs_year -; ---} - ---{115 IN (@list_of_analysis_ids)}?{ --- 115 Number of persons with observation period end < start -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 115 as analysis_id, - COUNT_BIG(op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 -where op1.observation_period_end_date < op1.observation_period_start_date -; ---} - - - ---{116 IN (@list_of_analysis_ids)}?{ --- 116 Number of persons with at least one day of observation in each year by gender and age decile --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -select distinct - YEAR(observation_period_start_date) as obs_year -INTO - #temp_dates -from - @cdm_database_schema.OBSERVATION_PERIOD -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 116 as analysis_id, - t1.obs_year as stratum_1, - p1.gender_concept_id as stratum_2, - floor((t1.obs_year - p1.year_of_birth)/10) as stratum_3, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.observation_period op1 - on p1.person_id = op1.person_id - , - #temp_dates t1 -where year(op1.OBSERVATION_PERIOD_START_DATE) <= t1.obs_year - and year(op1.OBSERVATION_PERIOD_END_DATE) >= t1.obs_year -group by t1.obs_year, - p1.gender_concept_id, - floor((t1.obs_year - p1.year_of_birth)/10) -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; - ---} - - ---{117 IN (@list_of_analysis_ids)}?{ --- 117 Number of persons with at least one day of observation in each year by gender and age decile --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -select distinct - YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) as obs_month -into - #temp_dates -from - @cdm_database_schema.OBSERVATION_PERIOD -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 117 as analysis_id, - t1.obs_month as stratum_1, - COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1, - #temp_dates t1 -where YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) <= t1.obs_month - and YEAR(observation_period_end_date)*100 + MONTH(observation_period_end_date) >= t1.obs_month -group by t1.obs_month -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - ---{118 IN (@list_of_analysis_ids)}?{ --- 118 Number of observation period records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 118 as analysis_id, - COUNT_BIG(op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = op1.person_id -where p1.person_id is null -; ---} - - -/******************************************** - -ACHILLES Analyses on VISIT_OCCURRENCE table - -*********************************************/ - - ---{200 IN (@list_of_analysis_ids)}?{ --- 200 Number of persons with at least one visit occurrence, by visit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 200 as analysis_id, - vo1.place_of_service_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 -group by vo1.place_of_service_CONCEPT_ID -; ---} - - ---{201 IN (@list_of_analysis_ids)}?{ --- 201 Number of visit occurrence records, by visit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 201 as analysis_id, - vo1.place_of_service_CONCEPT_ID as stratum_1, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 -group by vo1.place_of_service_CONCEPT_ID -; ---} - - - ---{202 IN (@list_of_analysis_ids)}?{ --- 202 Number of persons by visit occurrence start month, by visit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 202 as analysis_id, - vo1.place_of_service_concept_id as stratum_1, - YEAR(visit_start_date)*100 + month(visit_start_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.visit_occurrence vo1 -group by vo1.place_of_service_concept_id, - YEAR(visit_start_date)*100 + month(visit_start_date) -; ---} - - - ---{203 IN (@list_of_analysis_ids)}?{ --- 203 Number of distinct visit occurrence concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 203 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( - select num_visits as count_value, - 1.0*(row_number() over (order by num_visits))/(COUNT_BIG(*) over ()+1) as p1 - from ( - select vo1.person_id, COUNT_BIG(distinct vo1.place_of_service_concept_id) as num_visits - from @cdm_database_schema.visit_occurrence vo1 - group by vo1.person_id - ) t0 -) t1 -; ---} - - - ---{204 IN (@list_of_analysis_ids)}?{ --- 204 Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 204 as analysis_id, - vo1.place_of_service_concept_id as stratum_1, - YEAR(visit_start_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(visit_start_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.visit_occurrence vo1 -on p1.person_id = vo1.person_id -group by vo1.place_of_service_concept_id, - YEAR(visit_start_date), - p1.gender_concept_id, - floor((year(visit_start_date) - p1.year_of_birth)/10) -; ---} - - - - - ---{206 IN (@list_of_analysis_ids)}?{ --- 206 Distribution of age by visit_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 206 as analysis_id, - place_of_service_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( - select vo1.place_of_service_concept_id, - p1.gender_concept_id, - vo1.visit_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by vo1.place_of_service_concept_id, p1.gender_concept_id order by vo1.visit_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by vo1.place_of_service_concept_id, p1.gender_concept_id)+1) as p1 - from @cdm_database_schema.person p1 - inner join ( - select person_id, place_of_service_concept_id, min(year(visit_start_date)) as visit_start_year - from @cdm_database_schema.visit_occurrence - group by person_id, place_of_service_concept_id - ) vo1 on p1.person_id = vo1.person_id -) t1 -group by place_of_service_concept_id, gender_concept_id -; ---} - - ---{207 IN (@list_of_analysis_ids)}?{ ---207 Number of visit records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 207 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = vo1.person_id -where p1.person_id is null -; ---} - - ---{208 IN (@list_of_analysis_ids)}?{ ---208 Number of visit records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 208 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = vo1.person_id - and vo1.visit_start_date >= op1.observation_period_start_date - and vo1.visit_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - ---{209 IN (@list_of_analysis_ids)}?{ ---209 Number of visit records with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 209 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 -where visit_end_date < visit_start_date -; ---} - ---{210 IN (@list_of_analysis_ids)}?{ ---210 Number of visit records with invalid care_site_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 210 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 - left join @cdm_database_schema.care_site cs1 - on vo1.care_site_id = cs1.care_site_id -where vo1.care_site_id is not null - and cs1.care_site_id is null -; ---} - - ---{211 IN (@list_of_analysis_ids)}?{ --- 211 Distribution of length of stay by visit_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 211 as analysis_id, - place_of_service_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( - select place_of_service_concept_id, count_value, (1.0 * (row_number() over (partition by place_of_service_concept_id order by count_value)) / (Q.total +1)) as p1 - from - ( - select vo1.place_of_service_concept_id, datediff(dd,visit_start_date,visit_end_date) as count_value, pc.total - from @cdm_database_schema.visit_occurrence vo1 - JOIN - ( - select place_of_service_concept_id, COUNT_BIG(*) as total from @cdm_database_schema.visit_occurrence group by PLACE_OF_SERVICE_CONCEPT_ID - ) pc on pc.PLACE_OF_SERVICE_CONCEPT_ID = vo1.PLACE_OF_SERVICE_CONCEPT_ID - ) Q -) t1 -group by place_of_service_concept_id; ---} - - ---{220 IN (@list_of_analysis_ids)}?{ --- 220 Number of visit occurrence records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 220 as analysis_id, - YEAR(visit_start_date)*100 + month(visit_start_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.visit_occurrence vo1 -group by YEAR(visit_start_date)*100 + month(visit_start_date) -; ---} - -/******************************************** - -ACHILLES Analyses on PROVIDER table - -*********************************************/ - - ---{300 IN (@list_of_analysis_ids)}?{ --- 300 Number of providers -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 300 as analysis_id, COUNT_BIG(distinct provider_id) as count_value -from @cdm_database_schema.provider; ---} - - ---{301 IN (@list_of_analysis_ids)}?{ --- 301 Number of providers by specialty concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 301 as analysis_id, specialty_concept_id as stratum_1, COUNT_BIG(distinct provider_id) as count_value -from @cdm_database_schema.provider -group by specialty_CONCEPT_ID; ---} - ---{302 IN (@list_of_analysis_ids)}?{ --- 302 Number of providers with invalid care site id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 302 as analysis_id, COUNT_BIG(provider_id) as count_value -from @cdm_database_schema.provider p1 - left join @cdm_database_schema.care_site cs1 - on p1.care_site_id = cs1.care_site_id -where p1.care_site_id is not null - and cs1.care_site_id is null -; ---} - - - -/******************************************** - -ACHILLES Analyses on CONDITION_OCCURRENCE table - -*********************************************/ - - ---{400 IN (@list_of_analysis_ids)}?{ --- 400 Number of persons with at least one condition occurrence, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 400 as analysis_id, - co1.condition_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -group by co1.condition_CONCEPT_ID -; ---} - - ---{401 IN (@list_of_analysis_ids)}?{ --- 401 Number of condition occurrence records, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 401 as analysis_id, - co1.condition_CONCEPT_ID as stratum_1, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -group by co1.condition_CONCEPT_ID -; ---} - - - ---{402 IN (@list_of_analysis_ids)}?{ --- 402 Number of persons by condition occurrence start month, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 402 as analysis_id, - co1.condition_concept_id as stratum_1, - YEAR(condition_start_date)*100 + month(condition_start_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.condition_occurrence co1 -group by co1.condition_concept_id, - YEAR(condition_start_date)*100 + month(condition_start_date) -; ---} - - - ---{403 IN (@list_of_analysis_ids)}?{ --- 403 Number of distinct condition occurrence concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 403 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select num_conditions as count_value, - 1.0*(row_number() over (order by num_conditions))/(COUNT_BIG(*) over ()+1) as p1 -from - ( - select co1.person_id, COUNT_BIG(distinct co1.condition_concept_id) as num_conditions - from - @cdm_database_schema.condition_occurrence co1 - group by co1.person_id - ) t0 -) t1 -; ---} - - - ---{404 IN (@list_of_analysis_ids)}?{ --- 404 Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 404 as analysis_id, - co1.condition_concept_id as stratum_1, - YEAR(condition_start_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(condition_start_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.condition_occurrence co1 -on p1.person_id = co1.person_id -group by co1.condition_concept_id, - YEAR(condition_start_date), - p1.gender_concept_id, - floor((year(condition_start_date) - p1.year_of_birth)/10) -; ---} - ---{405 IN (@list_of_analysis_ids)}?{ --- 405 Number of condition occurrence records, by condition_concept_id by condition_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 405 as analysis_id, - co1.condition_CONCEPT_ID as stratum_1, - co1.condition_type_concept_id as stratum_2, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -group by co1.condition_CONCEPT_ID, - co1.condition_type_concept_id -; ---} - - - ---{406 IN (@list_of_analysis_ids)}?{ --- 406 Distribution of age by condition_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 406 as analysis_id, - condition_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( - select co1.condition_concept_id, - p1.gender_concept_id, - co1.condition_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by co1.condition_concept_id, p1.gender_concept_id order by co1.condition_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by co1.condition_concept_id, p1.gender_concept_id)+1) as p1 - from @cdm_database_schema.person p1 - inner join ( - select person_id, condition_concept_id, min(year(condition_start_date)) as condition_start_year - from @cdm_database_schema.condition_occurrence - group by person_id, condition_concept_id - ) co1 on p1.person_id = co1.person_id -) t1 -group by condition_concept_id, gender_concept_id -; ---} - - ---{409 IN (@list_of_analysis_ids)}?{ --- 409 Number of condition occurrence records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 409 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = co1.person_id -where p1.person_id is null -; ---} - - ---{410 IN (@list_of_analysis_ids)}?{ --- 410 Number of condition occurrence records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 410 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = co1.person_id - and co1.condition_start_date >= op1.observation_period_start_date - and co1.condition_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{411 IN (@list_of_analysis_ids)}?{ --- 411 Number of condition occurrence records with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 411 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -where co1.condition_end_date < co1.condition_start_date -; ---} - - ---{412 IN (@list_of_analysis_ids)}?{ --- 412 Number of condition occurrence records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 412 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = co1.associated_provider_id -where co1.associated_provider_id is not null - and p1.provider_id is null -; ---} - ---{413 IN (@list_of_analysis_ids)}?{ --- 413 Number of condition occurrence records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 413 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.visit_occurrence vo1 - on co1.visit_occurrence_id = vo1.visit_occurrence_id -where co1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - ---{420 IN (@list_of_analysis_ids)}?{ --- 420 Number of condition occurrence records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 420 as analysis_id, - YEAR(condition_start_date)*100 + month(condition_start_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.condition_occurrence co1 -group by YEAR(condition_start_date)*100 + month(condition_start_date) -; ---} - - - -/******************************************** - -ACHILLES Analyses on DEATH table - -*********************************************/ - - - ---{500 IN (@list_of_analysis_ids)}?{ --- 500 Number of persons with death, by cause_of_death_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 500 as analysis_id, - d1.cause_of_death_concept_id as stratum_1, - COUNT_BIG(distinct d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 -group by d1.cause_of_death_CONCEPT_ID -; ---} - - ---{501 IN (@list_of_analysis_ids)}?{ --- 501 Number of records of death, by cause_of_death_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 501 as analysis_id, - d1.cause_of_death_concept_id as stratum_1, - COUNT_BIG(d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 -group by d1.cause_of_death_CONCEPT_ID -; ---} - - - ---{502 IN (@list_of_analysis_ids)}?{ --- 502 Number of persons by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 502 as analysis_id, - YEAR(death_date)*100 + month(death_date) as stratum_1, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.death d1 -group by YEAR(death_date)*100 + month(death_date) -; ---} - - - ---{504 IN (@list_of_analysis_ids)}?{ --- 504 Number of persons with a death, by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 504 as analysis_id, - YEAR(death_date) as stratum_1, - p1.gender_concept_id as stratum_2, - floor((year(death_date) - p1.year_of_birth)/10) as stratum_3, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.death d1 -on p1.person_id = d1.person_id -group by YEAR(death_date), - p1.gender_concept_id, - floor((year(death_date) - p1.year_of_birth)/10) -; ---} - ---{505 IN (@list_of_analysis_ids)}?{ --- 505 Number of death records, by death_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 505 as analysis_id, - death_type_concept_id as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from - @cdm_database_schema.death d1 -group by death_type_concept_id -; ---} - - - ---{506 IN (@list_of_analysis_ids)}?{ --- 506 Distribution of age by condition_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 506 as analysis_id, - gender_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select p1.gender_concept_id, - d1.death_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by p1.gender_concept_id order by d1.death_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.person p1 -inner join -(select person_id, min(year(death_date)) as death_year -from @cdm_database_schema.death -group by person_id -) d1 -on p1.person_id = d1.person_id -) t1 -group by gender_concept_id -; ---} - - - ---{509 IN (@list_of_analysis_ids)}?{ --- 509 Number of death records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 509 as analysis_id, - COUNT_BIG(d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 - left join @cdm_database_schema.person p1 - on d1.person_id = p1.person_id -where p1.person_id is null -; ---} - - - ---{510 IN (@list_of_analysis_ids)}?{ --- 510 Number of death records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 510 as analysis_id, - COUNT_BIG(d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 - left join @cdm_database_schema.observation_period op1 - on d1.person_id = op1.person_id - and d1.death_date >= op1.observation_period_start_date - and d1.death_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{511 IN (@list_of_analysis_ids)}?{ --- 511 Distribution of time from death to last condition -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 511 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select datediff(dd,d1.death_date, t0.max_date) as count_value, - 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over () + 1) as p1 -from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(condition_start_date) as max_date - from @cdm_database_schema.condition_occurrence - group by person_id - ) t0 on d1.person_id = t0.person_id -) t1 -; ---} - - ---{512 IN (@list_of_analysis_ids)}?{ --- 512 Distribution of time from death to last drug -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 512 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select datediff(dd,d1.death_date, t0.max_date) as count_value, - 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over ()+1) as p1 -from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(drug_exposure_start_date) as max_date - from @cdm_database_schema.drug_exposure - group by person_id - ) t0 - on d1.person_id = t0.person_id -) t1 -; ---} - - ---{513 IN (@list_of_analysis_ids)}?{ --- 513 Distribution of time from death to last visit -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 513 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select datediff(dd,d1.death_date, t0.max_date) as count_value, - 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over ()+1) as p1 -from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(visit_start_date) as max_date - from @cdm_database_schema.visit_occurrence - group by person_id - ) t0 - on d1.person_id = t0.person_id -) t1 -; ---} - - ---{514 IN (@list_of_analysis_ids)}?{ --- 514 Distribution of time from death to last procedure -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 514 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select datediff(dd,d1.death_date, t0.max_date) as count_value, - 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over ()+1) as p1 -from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(procedure_date) as max_date - from @cdm_database_schema.procedure_occurrence - group by person_id - ) t0 - on d1.person_id = t0.person_id -) t1 -; ---} - - ---{515 IN (@list_of_analysis_ids)}?{ --- 515 Distribution of time from death to last observation -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 515 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select datediff(dd,d1.death_date, t0.max_date) as count_value, - 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over ()+1) as p1 -from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(observation_date) as max_date - from @cdm_database_schema.observation - group by person_id - ) t0 - on d1.person_id = t0.person_id -) t1 -; ---} - - - -/******************************************** - -ACHILLES Analyses on PROCEDURE_OCCURRENCE table - -*********************************************/ - - - ---{600 IN (@list_of_analysis_ids)}?{ --- 600 Number of persons with at least one procedure occurrence, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 600 as analysis_id, - po1.procedure_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_CONCEPT_ID -; ---} - - ---{601 IN (@list_of_analysis_ids)}?{ --- 601 Number of procedure occurrence records, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 601 as analysis_id, - po1.procedure_CONCEPT_ID as stratum_1, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_CONCEPT_ID -; ---} - - - ---{602 IN (@list_of_analysis_ids)}?{ --- 602 Number of persons by procedure occurrence start month, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 602 as analysis_id, - po1.procedure_concept_id as stratum_1, - YEAR(procedure_date)*100 + month(procedure_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_concept_id, - YEAR(procedure_date)*100 + month(procedure_date) -; ---} - - - ---{603 IN (@list_of_analysis_ids)}?{ --- 603 Number of distinct procedure occurrence concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 603 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select num_procedures as count_value, - 1.0*(row_number() over (order by num_procedures))/(COUNT_BIG(*) over ()+1) as p1 -from - ( - select po1.person_id, COUNT_BIG(distinct po1.procedure_concept_id) as num_procedures - from - @cdm_database_schema.procedure_occurrence po1 - group by po1.person_id - ) t0 -) t1 -; ---} - - - ---{604 IN (@list_of_analysis_ids)}?{ --- 604 Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 604 as analysis_id, - po1.procedure_concept_id as stratum_1, - YEAR(procedure_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(procedure_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.procedure_occurrence po1 -on p1.person_id = po1.person_id -group by po1.procedure_concept_id, - YEAR(procedure_date), - p1.gender_concept_id, - floor((year(procedure_date) - p1.year_of_birth)/10) -; ---} - ---{605 IN (@list_of_analysis_ids)}?{ --- 605 Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 605 as analysis_id, - po1.procedure_CONCEPT_ID as stratum_1, - po1.procedure_type_concept_id as stratum_2, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_CONCEPT_ID, - po1.procedure_type_concept_id -; ---} - - - ---{606 IN (@list_of_analysis_ids)}?{ --- 606 Distribution of age by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 606 as analysis_id, - procedure_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select po1.procedure_concept_id, - p1.gender_concept_id, - po1.procedure_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by po1.procedure_concept_id, p1.gender_concept_id order by po1.procedure_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by po1.procedure_concept_id, p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.person p1 -inner join -(select person_id, procedure_concept_id, min(year(procedure_date)) as procedure_start_year -from @cdm_database_schema.procedure_occurrence -group by person_id, procedure_concept_id -) po1 -on p1.person_id = po1.person_id -) t1 -group by procedure_concept_id, gender_concept_id -; ---} - - - - - - - ---{609 IN (@list_of_analysis_ids)}?{ --- 609 Number of procedure occurrence records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 609 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = po1.person_id -where p1.person_id is null -; ---} - - ---{610 IN (@list_of_analysis_ids)}?{ --- 610 Number of procedure occurrence records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 610 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = po1.person_id - and po1.procedure_date >= op1.observation_period_start_date - and po1.procedure_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - - ---{612 IN (@list_of_analysis_ids)}?{ --- 612 Number of procedure occurrence records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 612 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = po1.associated_provider_id -where po1.associated_provider_id is not null - and p1.provider_id is null -; ---} - ---{613 IN (@list_of_analysis_ids)}?{ --- 613 Number of procedure occurrence records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 613 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.visit_occurrence vo1 - on po1.visit_occurrence_id = vo1.visit_occurrence_id -where po1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - - ---{620 IN (@list_of_analysis_ids)}?{ --- 620 Number of procedure occurrence records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 620 as analysis_id, - YEAR(procedure_date)*100 + month(procedure_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.procedure_occurrence po1 -group by YEAR(procedure_date)*100 + month(procedure_date) -; ---} - - -/******************************************** - -ACHILLES Analyses on DRUG_EXPOSURE table - -*********************************************/ - - - - ---{700 IN (@list_of_analysis_ids)}?{ --- 700 Number of persons with at least one drug occurrence, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 700 as analysis_id, - de1.drug_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -group by de1.drug_CONCEPT_ID -; ---} - - ---{701 IN (@list_of_analysis_ids)}?{ --- 701 Number of drug occurrence records, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 701 as analysis_id, - de1.drug_CONCEPT_ID as stratum_1, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -group by de1.drug_CONCEPT_ID -; ---} - - - ---{702 IN (@list_of_analysis_ids)}?{ --- 702 Number of persons by drug occurrence start month, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 702 as analysis_id, - de1.drug_concept_id as stratum_1, - YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.drug_exposure de1 -group by de1.drug_concept_id, - YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) -; ---} - - - ---{703 IN (@list_of_analysis_ids)}?{ --- 703 Number of distinct drug exposure concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 703 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select num_drugs as count_value, - 1.0*(row_number() over (order by num_drugs))/(COUNT_BIG(*) over ()+1) as p1 -from - ( - select de1.person_id, COUNT_BIG(distinct de1.drug_concept_id) as num_drugs - from - @cdm_database_schema.drug_exposure de1 - group by de1.person_id - ) t0 -) t1 -; ---} - - - ---{704 IN (@list_of_analysis_ids)}?{ --- 704 Number of persons with at least one drug occurrence, by drug_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 704 as analysis_id, - de1.drug_concept_id as stratum_1, - YEAR(drug_exposure_start_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(drug_exposure_start_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.drug_exposure de1 -on p1.person_id = de1.person_id -group by de1.drug_concept_id, - YEAR(drug_exposure_start_date), - p1.gender_concept_id, - floor((year(drug_exposure_start_date) - p1.year_of_birth)/10) -; ---} - ---{705 IN (@list_of_analysis_ids)}?{ --- 705 Number of drug occurrence records, by drug_concept_id by drug_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 705 as analysis_id, - de1.drug_CONCEPT_ID as stratum_1, - de1.drug_type_concept_id as stratum_2, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -group by de1.drug_CONCEPT_ID, - de1.drug_type_concept_id -; ---} - - - ---{706 IN (@list_of_analysis_ids)}?{ --- 706 Distribution of age by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 706 as analysis_id, - drug_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select de1.drug_concept_id, - p1.gender_concept_id, - de1.drug_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by de1.drug_concept_id, p1.gender_concept_id order by de1.drug_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by de1.drug_concept_id, p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.person p1 -inner join -(select person_id, drug_concept_id, min(year(drug_exposure_start_date)) as drug_start_year -from @cdm_database_schema.drug_exposure -group by person_id, drug_concept_id -) de1 -on p1.person_id = de1.person_id -) t1 -group by drug_concept_id, gender_concept_id -; ---} - - - - ---{709 IN (@list_of_analysis_ids)}?{ --- 709 Number of drug exposure records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 709 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = de1.person_id -where p1.person_id is null -; ---} - - ---{710 IN (@list_of_analysis_ids)}?{ --- 710 Number of drug exposure records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 710 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = de1.person_id - and de1.drug_exposure_start_date >= op1.observation_period_start_date - and de1.drug_exposure_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{711 IN (@list_of_analysis_ids)}?{ --- 711 Number of drug exposure records with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 711 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -where de1.drug_exposure_end_date < de1.drug_exposure_start_date -; ---} - - ---{712 IN (@list_of_analysis_ids)}?{ --- 712 Number of drug exposure records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 712 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = de1.prescribing_provider_id -where de1.prescribing_provider_id is not null - and p1.provider_id is null -; ---} - ---{713 IN (@list_of_analysis_ids)}?{ --- 713 Number of drug exposure records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 713 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.visit_occurrence vo1 - on de1.visit_occurrence_id = vo1.visit_occurrence_id -where de1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - - - ---{715 IN (@list_of_analysis_ids)}?{ --- 715 Distribution of days_supply by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 715 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - days_supply as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by days_supply))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from (select * from @cdm_database_schema.drug_exposure where days_supply is not null) de1 - -) t1 -group by drug_concept_id -; ---} - - - ---{716 IN (@list_of_analysis_ids)}?{ --- 716 Distribution of refills by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 716 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - refills as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by refills))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from (select * from @cdm_database_schema.drug_exposure where refills is not null) de1 -) t1 -group by drug_concept_id -; ---} - - - - - ---{717 IN (@list_of_analysis_ids)}?{ --- 717 Distribution of quantity by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 717 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - quantity as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by quantity))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from (select * from @cdm_database_schema.drug_exposure where quantity is not null) de1 -) t1 -group by drug_concept_id -; ---} - - ---{720 IN (@list_of_analysis_ids)}?{ --- 720 Number of drug exposure records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 720 as analysis_id, - YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.drug_exposure de1 -group by YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) -; ---} - -/******************************************** - -ACHILLES Analyses on OBSERVATION table - -*********************************************/ - - - ---{800 IN (@list_of_analysis_ids)}?{ --- 800 Number of persons with at least one observation occurrence, by observation_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 800 as analysis_id, - o1.observation_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID -; ---} - - ---{801 IN (@list_of_analysis_ids)}?{ --- 801 Number of observation occurrence records, by observation_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 801 as analysis_id, - o1.observation_CONCEPT_ID as stratum_1, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID -; ---} - - - ---{802 IN (@list_of_analysis_ids)}?{ --- 802 Number of persons by observation occurrence start month, by observation_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 802 as analysis_id, - o1.observation_concept_id as stratum_1, - YEAR(observation_date)*100 + month(observation_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.observation o1 -group by o1.observation_concept_id, - YEAR(observation_date)*100 + month(observation_date) -; ---} - - - ---{803 IN (@list_of_analysis_ids)}?{ --- 803 Number of distinct observation occurrence concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 803 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select num_observations as count_value, - 1.0*(row_number() over (order by num_observations))/(COUNT_BIG(*) over ()+1) as p1 -from - ( - select o1.person_id, COUNT_BIG(distinct o1.observation_concept_id) as num_observations - from - @cdm_database_schema.observation o1 - group by o1.person_id - ) t0 -) t1 -; ---} - - - ---{804 IN (@list_of_analysis_ids)}?{ --- 804 Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 804 as analysis_id, - o1.observation_concept_id as stratum_1, - YEAR(observation_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(observation_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.observation o1 -on p1.person_id = o1.person_id -group by o1.observation_concept_id, - YEAR(observation_date), - p1.gender_concept_id, - floor((year(observation_date) - p1.year_of_birth)/10) -; ---} - ---{805 IN (@list_of_analysis_ids)}?{ --- 805 Number of observation occurrence records, by observation_concept_id by observation_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 805 as analysis_id, - o1.observation_CONCEPT_ID as stratum_1, - o1.observation_type_concept_id as stratum_2, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID, - o1.observation_type_concept_id -; ---} - - - ---{806 IN (@list_of_analysis_ids)}?{ --- 806 Distribution of age by observation_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 806 as analysis_id, - observation_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select o1.observation_concept_id, - p1.gender_concept_id, - o1.observation_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by o1.observation_concept_id, p1.gender_concept_id order by o1.observation_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by o1.observation_concept_id, p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.person p1 -inner join -(select person_id, observation_concept_id, min(year(observation_date)) as observation_start_year -from @cdm_database_schema.observation -group by person_id, observation_concept_id -) o1 -on p1.person_id = o1.person_id -) t1 -group by observation_concept_id, gender_concept_id -; ---} - ---{807 IN (@list_of_analysis_ids)}?{ --- 807 Number of observation occurrence records, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 807 as analysis_id, - o1.observation_CONCEPT_ID as stratum_1, - o1.unit_concept_id as stratum_2, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID, - o1.unit_concept_id -; ---} - - - - - ---{809 IN (@list_of_analysis_ids)}?{ --- 809 Number of observation records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 809 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = o1.person_id -where p1.person_id is null -; ---} - - ---{810 IN (@list_of_analysis_ids)}?{ --- 810 Number of observation records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 810 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = o1.person_id - and o1.observation_date >= op1.observation_period_start_date - and o1.observation_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - - ---{812 IN (@list_of_analysis_ids)}?{ --- 812 Number of observation records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 812 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = o1.associated_provider_id -where o1.associated_provider_id is not null - and p1.provider_id is null -; ---} - ---{813 IN (@list_of_analysis_ids)}?{ --- 813 Number of observation records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 813 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.visit_occurrence vo1 - on o1.visit_occurrence_id = vo1.visit_occurrence_id -where o1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - - ---{814 IN (@list_of_analysis_ids)}?{ --- 814 Number of observation records with no value (numeric, string, or concept) -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 814 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -where o1.value_as_number is null - and o1.value_as_string is null - and o1.value_as_concept_id is null -; ---} - - ---{815 IN (@list_of_analysis_ids)}?{ --- 815 Distribution of numeric values, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 815 as analysis_id, - observation_concept_id as stratum_1, - unit_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select observation_concept_id, unit_concept_id, - value_as_number as count_value, - 1.0*(row_number() over (partition by observation_concept_id, unit_concept_id order by value_as_number))/(COUNT_BIG(*) over (partition by observation_concept_id, unit_concept_id)+1) as p1 -from @cdm_database_schema.observation o1 -where o1.unit_concept_id is not null - and o1.value_as_number is not null -) t1 -group by observation_concept_id, unit_concept_id -; ---} - - ---{816 IN (@list_of_analysis_ids)}?{ --- 816 Distribution of low range, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 816 as analysis_id, - observation_concept_id as stratum_1, - unit_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select observation_concept_id, unit_concept_id, - range_low as count_value, - 1.0*(row_number() over (partition by observation_concept_id, unit_concept_id order by range_low))/(COUNT_BIG(*) over (partition by observation_concept_id, unit_concept_id)+1) as p1 -from @cdm_database_schema.observation o1 -where o1.unit_concept_id is not null - and o1.value_as_number is not null - and o1.range_low is not null - and o1.range_high is not null -) t1 -group by observation_concept_id, unit_concept_id -; ---} - - ---{817 IN (@list_of_analysis_ids)}?{ --- 817 Distribution of high range, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 817 as analysis_id, - observation_concept_id as stratum_1, - unit_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select observation_concept_id, unit_concept_id, - range_high as count_value, - 1.0*(row_number() over (partition by observation_concept_id, unit_concept_id order by range_high))/(COUNT_BIG(*) over (partition by observation_concept_id, unit_concept_id)+1) as p1 -from @cdm_database_schema.observation o1 -where o1.unit_concept_id is not null - and o1.value_as_number is not null - and o1.range_low is not null - and o1.range_high is not null -) t1 -group by observation_concept_id, unit_concept_id -; ---} - - - ---{818 IN (@list_of_analysis_ids)}?{ --- 818 Number of observation records below/within/above normal range, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 818 as analysis_id, - observation_concept_id as stratum_1, - unit_concept_id as stratum_2, - case when o1.value_as_number < o1.range_low then 'Below Range Low' - when o1.value_as_number >= o1.range_low and o1.value_as_number <= o1.range_high then 'Within Range' - when o1.value_as_number > o1.range_high then 'Above Range High' - else 'Other' end as stratum_3, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -where o1.value_as_number is not null - and o1.unit_concept_id is not null - and o1.range_low is not null - and o1.range_high is not null -group by observation_concept_id, - unit_concept_id, - case when o1.value_as_number < o1.range_low then 'Below Range Low' - when o1.value_as_number >= o1.range_low and o1.value_as_number <= o1.range_high then 'Within Range' - when o1.value_as_number > o1.range_high then 'Above Range High' - else 'Other' end -; ---} - - - ---{820 IN (@list_of_analysis_ids)}?{ --- 820 Number of observation records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 820 as analysis_id, - YEAR(observation_date)*100 + month(observation_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.observation o1 -group by YEAR(observation_date)*100 + month(observation_date) -; ---} - - - - -/******************************************** - -ACHILLES Analyses on DRUG_ERA table - -*********************************************/ - - ---{900 IN (@list_of_analysis_ids)}?{ --- 900 Number of persons with at least one drug occurrence, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 900 as analysis_id, - de1.drug_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 -group by de1.drug_CONCEPT_ID -; ---} - - ---{901 IN (@list_of_analysis_ids)}?{ --- 901 Number of drug occurrence records, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 901 as analysis_id, - de1.drug_CONCEPT_ID as stratum_1, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 -group by de1.drug_CONCEPT_ID -; ---} - - - ---{902 IN (@list_of_analysis_ids)}?{ --- 902 Number of persons by drug occurrence start month, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 902 as analysis_id, - de1.drug_concept_id as stratum_1, - YEAR(drug_era_start_date)*100 + month(drug_era_start_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.drug_era de1 -group by de1.drug_concept_id, - YEAR(drug_era_start_date)*100 + month(drug_era_start_date) -; ---} - - - ---{903 IN (@list_of_analysis_ids)}?{ --- 903 Number of distinct drug era concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 903 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select num_drugs as count_value, - 1.0*(row_number() over (order by num_drugs))/(COUNT_BIG(*) over ()+1) as p1 -from - ( - select de1.person_id, COUNT_BIG(distinct de1.drug_concept_id) as num_drugs - from - @cdm_database_schema.drug_era de1 - group by de1.person_id - ) t0 -) t1 -; ---} - - - ---{904 IN (@list_of_analysis_ids)}?{ --- 904 Number of persons with at least one drug occurrence, by drug_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 904 as analysis_id, - de1.drug_concept_id as stratum_1, - YEAR(drug_era_start_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(drug_era_start_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.drug_era de1 -on p1.person_id = de1.person_id -group by de1.drug_concept_id, - YEAR(drug_era_start_date), - p1.gender_concept_id, - floor((year(drug_era_start_date) - p1.year_of_birth)/10) -; ---} - - - - ---{906 IN (@list_of_analysis_ids)}?{ --- 906 Distribution of age by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 906 as analysis_id, - drug_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select de1.drug_concept_id, - p1.gender_concept_id, - de1.drug_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by de1.drug_concept_id, p1.gender_concept_id order by de1.drug_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by de1.drug_concept_id, p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.person p1 -inner join -(select person_id, drug_concept_id, min(year(drug_era_start_date)) as drug_start_year -from @cdm_database_schema.drug_era -group by person_id, drug_concept_id -) de1 -on p1.person_id = de1.person_id -) t1 -group by drug_concept_id, gender_concept_id -; ---} - - - - - ---{907 IN (@list_of_analysis_ids)}?{ --- 907 Distribution of drug era length, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 907 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - datediff(dd,drug_era_start_date, drug_era_end_date) as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by datediff(dd,drug_era_start_date, drug_era_end_date)))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_era de1 - -) t1 -group by drug_concept_id -; ---} - - - ---{908 IN (@list_of_analysis_ids)}?{ --- 908 Number of drug eras with invalid person -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 908 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = de1.person_id -where p1.person_id is null -; ---} - - ---{909 IN (@list_of_analysis_ids)}?{ --- 909 Number of drug eras outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 909 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = de1.person_id - and de1.drug_era_start_date >= op1.observation_period_start_date - and de1.drug_era_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{910 IN (@list_of_analysis_ids)}?{ --- 910 Number of drug eras with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 910 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 -where de1.drug_era_end_date < de1.drug_era_start_date -; ---} - - - ---{920 IN (@list_of_analysis_ids)}?{ --- 920 Number of drug era records by drug era start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 920 as analysis_id, - YEAR(drug_era_start_date)*100 + month(drug_era_start_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.drug_era de1 -group by YEAR(drug_era_start_date)*100 + month(drug_era_start_date) -; ---} - - - - - -/******************************************** - -ACHILLES Analyses on CONDITION_ERA table - -*********************************************/ - - ---{1000 IN (@list_of_analysis_ids)}?{ --- 1000 Number of persons with at least one condition occurrence, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1000 as analysis_id, - ce1.condition_CONCEPT_ID as stratum_1, - COUNT_BIG(distinct ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 -group by ce1.condition_CONCEPT_ID -; ---} - - ---{1001 IN (@list_of_analysis_ids)}?{ --- 1001 Number of condition occurrence records, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1001 as analysis_id, - ce1.condition_CONCEPT_ID as stratum_1, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 -group by ce1.condition_CONCEPT_ID -; ---} - - - ---{1002 IN (@list_of_analysis_ids)}?{ --- 1002 Number of persons by condition occurrence start month, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 1002 as analysis_id, - ce1.condition_concept_id as stratum_1, - YEAR(condition_era_start_date)*100 + month(condition_era_start_date) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.condition_era ce1 -group by ce1.condition_concept_id, - YEAR(condition_era_start_date)*100 + month(condition_era_start_date) -; ---} - - - ---{1003 IN (@list_of_analysis_ids)}?{ --- 1003 Number of distinct condition era concepts per person -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1003 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select num_conditions as count_value, - 1.0*(row_number() over (order by num_conditions))/(COUNT_BIG(*) over ()+1) as p1 -from - ( - select ce1.person_id, COUNT_BIG(distinct ce1.condition_concept_id) as num_conditions - from - @cdm_database_schema.condition_era ce1 - group by ce1.person_id - ) t0 -) t1 -; ---} - - - ---{1004 IN (@list_of_analysis_ids)}?{ --- 1004 Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 1004 as analysis_id, - ce1.condition_concept_id as stratum_1, - YEAR(condition_era_start_date) as stratum_2, - p1.gender_concept_id as stratum_3, - floor((year(condition_era_start_date) - p1.year_of_birth)/10) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.person p1 -inner join -@cdm_database_schema.condition_era ce1 -on p1.person_id = ce1.person_id -group by ce1.condition_concept_id, - YEAR(condition_era_start_date), - p1.gender_concept_id, - floor((year(condition_era_start_date) - p1.year_of_birth)/10) -; ---} - - - - ---{1006 IN (@list_of_analysis_ids)}?{ --- 1006 Distribution of age by condition_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1006 as analysis_id, - condition_concept_id as stratum_1, - gender_concept_id as stratum_2, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select ce1.condition_concept_id, - p1.gender_concept_id, - ce1.condition_start_year - p1.year_of_birth as count_value, - 1.0*(row_number() over (partition by ce1.condition_concept_id, p1.gender_concept_id order by ce1.condition_start_year - p1.year_of_birth))/(COUNT_BIG(*) over (partition by ce1.condition_concept_id, p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.person p1 -inner join -(select person_id, condition_concept_id, min(year(condition_era_start_date)) as condition_start_year -from @cdm_database_schema.condition_era -group by person_id, condition_concept_id -) ce1 -on p1.person_id = ce1.person_id -) t1 -group by condition_concept_id, gender_concept_id -; ---} - - - - - ---{1007 IN (@list_of_analysis_ids)}?{ --- 1007 Distribution of condition era length, by condition_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1007 as analysis_id, - condition_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select condition_concept_id, - datediff(dd,condition_era_start_date, condition_era_end_date) as count_value, - 1.0*(row_number() over (partition by condition_concept_id order by datediff(dd,condition_era_start_date, condition_era_end_date)))/(COUNT_BIG(*) over (partition by condition_concept_id)+1) as p1 -from @cdm_database_schema.condition_era ce1 - -) t1 -group by condition_concept_id -; ---} - - - ---{1008 IN (@list_of_analysis_ids)}?{ --- 1008 Number of condition eras with invalid person -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1008 as analysis_id, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = ce1.person_id -where p1.person_id is null -; ---} - - ---{1009 IN (@list_of_analysis_ids)}?{ --- 1009 Number of condition eras outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1009 as analysis_id, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = ce1.person_id - and ce1.condition_era_start_date >= op1.observation_period_start_date - and ce1.condition_era_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{1010 IN (@list_of_analysis_ids)}?{ --- 1010 Number of condition eras with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1010 as analysis_id, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 -where ce1.condition_era_end_date < ce1.condition_era_start_date -; ---} - - ---{1020 IN (@list_of_analysis_ids)}?{ --- 1020 Number of drug era records by drug era start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1020 as analysis_id, - YEAR(condition_era_start_date)*100 + month(condition_era_start_date) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.condition_era ce1 -group by YEAR(condition_era_start_date)*100 + month(condition_era_start_date) -; ---} - - - - -/******************************************** - -ACHILLES Analyses on LOCATION table - -*********************************************/ - ---{1100 IN (@list_of_analysis_ids)}?{ --- 1100 Number of persons by location 3-digit zip -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1100 as analysis_id, - left(l1.zip,3) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.LOCATION l1 - on p1.location_id = l1.location_id -where p1.location_id is not null - and l1.zip is not null -group by left(l1.zip,3); ---} - - ---{1101 IN (@list_of_analysis_ids)}?{ --- 1101 Number of persons by location state -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1101 as analysis_id, - l1.state as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.LOCATION l1 - on p1.location_id = l1.location_id -where p1.location_id is not null - and l1.state is not null -group by l1.state; ---} - - ---{1102 IN (@list_of_analysis_ids)}?{ --- 1102 Number of care sites by location 3-digit zip -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1102 as analysis_id, - left(l1.zip,3) as stratum_1, COUNT_BIG(distinct care_site_id) as count_value -from @cdm_database_schema.care_site cs1 - inner join @cdm_database_schema.LOCATION l1 - on cs1.location_id = l1.location_id -where cs1.location_id is not null - and l1.zip is not null -group by left(l1.zip,3); ---} - - ---{1103 IN (@list_of_analysis_ids)}?{ --- 1103 Number of care sites by location state -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1103 as analysis_id, - l1.state as stratum_1, COUNT_BIG(distinct care_site_id) as count_value -from @cdm_database_schema.care_site cs1 - inner join @cdm_database_schema.LOCATION l1 - on cs1.location_id = l1.location_id -where cs1.location_id is not null - and l1.state is not null -group by l1.state; ---} - - -/******************************************** - -ACHILLES Analyses on CARE_SITE table - -*********************************************/ - - ---{1200 IN (@list_of_analysis_ids)}?{ --- 1200 Number of persons by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1200 as analysis_id, - cs1.place_of_service_concept_id as stratum_1, COUNT_BIG(person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.care_site cs1 - on p1.care_site_id = cs1.care_site_id -where p1.care_site_id is not null - and cs1.place_of_service_concept_id is not null -group by cs1.place_of_service_concept_id; ---} - - ---{1201 IN (@list_of_analysis_ids)}?{ --- 1201 Number of visits by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1201 as analysis_id, - cs1.place_of_service_concept_id as stratum_1, COUNT_BIG(visit_occurrence_id) as count_value -from @cdm_database_schema.visit_occurrence vo1 - inner join @cdm_database_schema.care_site cs1 - on vo1.care_site_id = cs1.care_site_id -where vo1.care_site_id is not null - and cs1.place_of_service_concept_id is not null -group by cs1.place_of_service_concept_id; ---} - - ---{1202 IN (@list_of_analysis_ids)}?{ --- 1202 Number of care sites by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1202 as analysis_id, - cs1.place_of_service_concept_id as stratum_1, - COUNT_BIG(care_site_id) as count_value -from @cdm_database_schema.care_site cs1 -where cs1.place_of_service_concept_id is not null -group by cs1.place_of_service_concept_id; ---} - - -/******************************************** - -ACHILLES Analyses on ORGANIZATION table - -*********************************************/ - ---{1300 IN (@list_of_analysis_ids)}?{ --- 1300 Number of organizations by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1300 as analysis_id, - place_of_service_concept_id as stratum_1, - COUNT_BIG(organization_id) as count_value -from @cdm_database_schema.organization o1 -where place_of_service_concept_id is not null -group by place_of_service_concept_id; ---} - - - - - -/******************************************** - -ACHILLES Analyses on PAYOR_PLAN_PERIOD table - -*********************************************/ - - ---{1406 IN (@list_of_analysis_ids)}?{ --- 1406 Length of payer plan (days) of first payer plan period by gender -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1406 as analysis_id, - gender_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select p1.gender_concept_id, - DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date) as count_value, - 1.0*(row_number() over (partition by p1.gender_concept_id order by DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)))/(COUNT_BIG(*) over (partition by p1.gender_concept_id)+1) as p1 -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - payer_plan_period_START_DATE, - payer_plan_period_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 - from @cdm_database_schema.payer_plan_period - ) ppp1 - on p1.PERSON_ID = ppp1.PERSON_ID - where ppp1.rn1 = 1 -) t1 -group by gender_concept_id -; ---} - - - ---{1407 IN (@list_of_analysis_ids)}?{ --- 1407 Length of payer plan (days) of first payer plan period by age decile -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1407 as analysis_id, - age_decile as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select floor((year(ppp1.payer_plan_period_START_DATE) - p1.YEAR_OF_BIRTH)/140) as age_decile, - DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date) as count_value, - 1.0*(row_number() over (partition by floor((year(ppp1.payer_plan_period_START_DATE) - p1.YEAR_OF_BIRTH)/140) order by DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)))/(COUNT_BIG(*) over (partition by floor((year(ppp1.payer_plan_period_START_DATE) - p1.YEAR_OF_BIRTH)/140))+1) as p1 -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - payer_plan_period_START_DATE, - payer_plan_period_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 - from @cdm_database_schema.payer_plan_period - ) ppp1 - on p1.PERSON_ID = ppp1.PERSON_ID - where ppp1.rn1 = 1 -) t1 -group by age_decile -; ---} - - - - - - ---{1408 IN (@list_of_analysis_ids)}?{ --- 1408 Number of persons by length of payer plan period, in 30d increments -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1408 as analysis_id, - floor(DATEDIFF(dd, ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)/30) as stratum_1, - COUNT_BIG(distinct p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - payer_plan_period_START_DATE, - payer_plan_period_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 - from @cdm_database_schema.payer_plan_period - ) ppp1 - on p1.PERSON_ID = ppp1.PERSON_ID - where ppp1.rn1 = 1 -group by floor(DATEDIFF(dd, ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)/30) -; ---} - - ---{1409 IN (@list_of_analysis_ids)}?{ --- 1409 Number of persons with continuous payer plan in each year --- Note: using temp table instead of nested query because this gives vastly improved - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -select distinct - YEAR(payer_plan_period_start_date) as obs_year -INTO - #temp_dates -from - @cdm_database_schema.payer_plan_period -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1409 as analysis_id, - t1.obs_year as stratum_1, COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join - @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id - , - #temp_dates t1 -where year(ppp1.payer_plan_period_START_DATE) <= t1.obs_year - and year(ppp1.payer_plan_period_END_DATE) >= t1.obs_year -group by t1.obs_year -; - -truncate table #temp_dates; -drop table #temp_dates; ---} - - ---{1410 IN (@list_of_analysis_ids)}?{ --- 1410 Number of persons with continuous payer plan in each month --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -SELECT DISTINCT - YEAR(payer_plan_period_start_date)*100 + MONTH(payer_plan_period_start_date) AS obs_month, - DATEFROMPARTS(YEAR(payer_plan_period_start_date),MONTH(payer_plan_period_start_date),1) AS obs_month_start, - EOMONTH(payer_plan_period_start_date) AS obs_month_end -INTO - #temp_dates -FROM - @cdm_database_schema.payer_plan_period -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select - 1410 as analysis_id, - obs_month as stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join - @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id - , - #temp_dates -where ppp1.payer_plan_period_START_DATE <= obs_month_start - and ppp1.payer_plan_period_END_DATE >= obs_month_end -group by obs_month -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - - ---{1411 IN (@list_of_analysis_ids)}?{ --- 1411 Number of persons by payer plan period start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1411 as analysis_id, - DATEFROMPARTS(YEAR(payer_plan_period_start_date), month(payer_plan_period_START_DATE),1) as stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id -group by DATEFROMPARTS(YEAR(payer_plan_period_start_date), month(payer_plan_period_START_DATE),1) -; ---} - - - ---{1412 IN (@list_of_analysis_ids)}?{ --- 1412 Number of persons by payer plan period end month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1412 as analysis_id, - DATEFROMPARTS(YEAR(payer_plan_period_end_date), month(payer_plan_period_end_DATE), 1) as stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id -group by DATEFROMPARTS(YEAR(payer_plan_period_end_date), month(payer_plan_period_end_DATE), 1) -; ---} - - ---{1413 IN (@list_of_analysis_ids)}?{ --- 1413 Number of persons by number of payer plan periods -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1413 as analysis_id, - ppp1.num_periods as stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join (select person_id, COUNT_BIG(payer_plan_period_start_date) as num_periods from @cdm_database_schema.payer_plan_period group by PERSON_ID) ppp1 - on p1.person_id = ppp1.person_id -group by ppp1.num_periods -; ---} - ---{1414 IN (@list_of_analysis_ids)}?{ --- 1414 Number of persons with payer plan period before year-of-birth -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1414 as analysis_id, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(year(payer_plan_period_start_date)) as first_obs_year from @cdm_database_schema.payer_plan_period group by PERSON_ID) ppp1 - on p1.person_id = ppp1.person_id -where p1.year_of_birth > ppp1.first_obs_year -; ---} - ---{1415 IN (@list_of_analysis_ids)}?{ --- 1415 Number of persons with payer plan period end < start -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1415 as analysis_id, - COUNT_BIG(ppp1.PERSON_ID) as count_value -from - @cdm_database_schema.payer_plan_period ppp1 -where ppp1.payer_plan_period_end_date < ppp1.payer_plan_period_start_date -; ---} - - - - - - -/******************************************** - -ACHILLES Analyses on DRUG_COST table - -*********************************************/ - ---{1500 IN (@list_of_analysis_ids)}?{ --- 1500 Number of drug cost records with invalid drug exposure id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1500 as analysis_id, - COUNT_BIG(dc1.drug_cost_ID) as count_value -from - @cdm_database_schema.drug_cost dc1 - left join @cdm_database_schema.drug_exposure de1 - on dc1.drug_exposure_id = de1.drug_exposure_id -where de1.drug_exposure_id is null -; ---} - ---{1501 IN (@list_of_analysis_ids)}?{ --- 1501 Number of drug cost records with invalid payer plan period id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1501 as analysis_id, - COUNT_BIG(dc1.drug_cost_ID) as count_value -from - @cdm_database_schema.drug_cost dc1 - left join @cdm_database_schema.payer_plan_period ppp1 - on dc1.payer_plan_period_id = ppp1.payer_plan_period_id -where dc1.payer_plan_period_id is not null - and ppp1.payer_plan_period_id is null -; ---} - - ---{1502 IN (@list_of_analysis_ids)}?{ --- 1502 Distribution of paid copay, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1502 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - paid_copay as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by paid_copay))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where paid_copay is not null -) t1 -group by drug_concept_id -; ---} - - ---{1503 IN (@list_of_analysis_ids)}?{ --- 1503 Distribution of paid coinsurance, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1503 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - paid_coinsurance as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by paid_coinsurance))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where paid_coinsurance is not null -) t1 -group by drug_concept_id -; ---} - ---{1504 IN (@list_of_analysis_ids)}?{ --- 1504 Distribution of paid toward deductible, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1504 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - paid_toward_deductible as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by paid_toward_deductible))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where paid_toward_deductible is not null -) t1 -group by drug_concept_id -; ---} - ---{1505 IN (@list_of_analysis_ids)}?{ --- 1505 Distribution of paid by payer, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1505 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - paid_by_payer as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by paid_by_payer))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where paid_by_payer is not null -) t1 -group by drug_concept_id -; ---} - ---{1506 IN (@list_of_analysis_ids)}?{ --- 1506 Distribution of paid by coordination of benefit, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1506 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - paid_by_coordination_benefits as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by paid_by_coordination_benefits))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where paid_by_coordination_benefits is not null -) t1 -group by drug_concept_id -; ---} - ---{1507 IN (@list_of_analysis_ids)}?{ --- 1507 Distribution of total out-of-pocket, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1507 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - total_out_of_pocket as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by total_out_of_pocket))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where total_out_of_pocket is not null -) t1 -group by drug_concept_id -; ---} - - ---{1508 IN (@list_of_analysis_ids)}?{ --- 1508 Distribution of total paid, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1508 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - total_paid as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by total_paid))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where total_paid is not null -) t1 -group by drug_concept_id -; ---} - - ---{1509 IN (@list_of_analysis_ids)}?{ --- 1509 Distribution of ingredient_cost, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1509 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - ingredient_cost as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by ingredient_cost))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where ingredient_cost is not null -) t1 -group by drug_concept_id -; ---} - ---{1510 IN (@list_of_analysis_ids)}?{ --- 1510 Distribution of dispensing fee, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1510 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - dispensing_fee as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by dispensing_fee))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where dispensing_fee is not null -) t1 -group by drug_concept_id -; ---} - ---{1511 IN (@list_of_analysis_ids)}?{ --- 1511 Distribution of average wholesale price, by drug_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1511 as analysis_id, - drug_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select drug_concept_id, - average_wholesale_price as count_value, - 1.0*(row_number() over (partition by drug_concept_id order by average_wholesale_price))/(COUNT_BIG(*) over (partition by drug_concept_id)+1) as p1 -from @cdm_database_schema.drug_exposure de1 - inner join - @cdm_database_schema.drug_cost dc1 - on de1.drug_exposure_id = dc1.drug_exposure_id -where average_wholesale_price is not null -) t1 -group by drug_concept_id -; ---} - -/******************************************** - -ACHILLES Analyses on PROCEDURE_COST table - -*********************************************/ - - ---{1600 IN (@list_of_analysis_ids)}?{ --- 1600 Number of procedure cost records with invalid procedure exposure id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1600 as analysis_id, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 - left join @cdm_database_schema.procedure_occurrence po1 - on pc1.procedure_occurrence_id = po1.procedure_occurrence_id -where po1.procedure_occurrence_id is null -; ---} - ---{1601 IN (@list_of_analysis_ids)}?{ --- 1601 Number of procedure cost records with invalid payer plan period id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1601 as analysis_id, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 - left join @cdm_database_schema.payer_plan_period ppp1 - on pc1.payer_plan_period_id = ppp1.payer_plan_period_id -where pc1.payer_plan_period_id is not null - and ppp1.payer_plan_period_id is null -; ---} - - ---{1602 IN (@list_of_analysis_ids)}?{ --- 1602 Distribution of paid copay, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1602 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - paid_copay as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by paid_copay))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where paid_copay is not null -) t1 -group by procedure_concept_id -; ---} - - ---{1603 IN (@list_of_analysis_ids)}?{ --- 1603 Distribution of paid coinsurance, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1603 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - paid_coinsurance as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by paid_coinsurance))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where paid_coinsurance is not null -) t1 -group by procedure_concept_id -; ---} - ---{1604 IN (@list_of_analysis_ids)}?{ --- 1604 Distribution of paid toward deductible, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1604 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - paid_toward_deductible as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by paid_toward_deductible))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where paid_toward_deductible is not null -) t1 -group by procedure_concept_id -; ---} - ---{1605 IN (@list_of_analysis_ids)}?{ --- 1605 Distribution of paid by payer, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1605 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - paid_by_payer as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by paid_by_payer))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where paid_by_payer is not null -) t1 -group by procedure_concept_id -; ---} - ---{1606 IN (@list_of_analysis_ids)}?{ --- 1606 Distribution of paid by coordination of benefit, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1606 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - paid_by_coordination_benefits as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by paid_by_coordination_benefits))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where paid_by_coordination_benefits is not null -) t1 -group by procedure_concept_id -; ---} - ---{1607 IN (@list_of_analysis_ids)}?{ --- 1607 Distribution of total out-of-pocket, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1607 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - total_out_of_pocket as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by total_out_of_pocket))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where total_out_of_pocket is not null -) t1 -group by procedure_concept_id -; ---} - - ---{1608 IN (@list_of_analysis_ids)}?{ --- 1608 Distribution of total paid, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 1608 as analysis_id, - procedure_concept_id as stratum_1, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - avg(1.0*count_value) as avg_value, - stdev(count_value) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select procedure_concept_id, - total_paid as count_value, - 1.0*(row_number() over (partition by procedure_concept_id order by total_paid))/(COUNT_BIG(*) over (partition by procedure_concept_id)+1) as p1 -from @cdm_database_schema.procedure_occurrence po1 - inner join - @cdm_database_schema.procedure_cost pc1 - on po1.procedure_occurrence_id = pc1.procedure_occurrence_id -where total_paid is not null -) t1 -group by procedure_concept_id -; ---} - - ---{1609 IN (@list_of_analysis_ids)}?{ --- 1609 Number of records by disease_class_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1609 as analysis_id, - disease_class_concept_id as stratum_1, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 -where disease_class_concept_id is not null -group by disease_class_concept_id -; ---} - - ---{1610 IN (@list_of_analysis_ids)}?{ --- 1610 Number of records by revenue_code_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1610 as analysis_id, - revenue_code_concept_id as stratum_1, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 -where revenue_code_concept_id is not null -group by revenue_code_concept_id -; ---} - - - -/******************************************** - -ACHILLES Analyses on COHORT table - -*********************************************/ - ---{1700 IN (@list_of_analysis_ids)}?{ --- 1700 Number of records by cohort_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1700 as analysis_id, - cohort_concept_id as stratum_1, - COUNT_BIG(subject_ID) as count_value -from - @cdm_database_schema.cohort c1 -group by cohort_concept_id -; ---} - - ---{1701 IN (@list_of_analysis_ids)}?{ --- 1701 Number of records with cohort end date < cohort start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1701 as analysis_id, - COUNT_BIG(subject_ID) as count_value -from - @cdm_database_schema.cohort c1 -where c1.cohort_end_date < c1.cohort_start_date -; ---} - - -delete from @results_database_schema.ACHILLES_results where count_value <= @smallcellcount; -delete from @results_database_schema.ACHILLES_results_dist where count_value <= @smallcellcount; - - diff --git a/inst/sql/sql_server/Achilles_v5.sql b/inst/sql/sql_server/Achilles_v5.sql deleted file mode 100644 index 2969fc9d..00000000 --- a/inst/sql/sql_server/Achilles_v5.sql +++ /dev/null @@ -1,7733 +0,0 @@ -/****************************************************************** - -# @file ACHILLES_v5.SQL -# -# Copyright 2014 Observational Health Data Sciences and Informatics -# -# This file is part of ACHILLES -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# @author Observational Health Data Sciences and Informatics - - - - -*******************************************************************/ - - -/******************************************************************* - -Achilles - database profiling summary statistics generation - -SQL for OMOP CDM v5 - - -*******************************************************************/ - -{DEFAULT @cdm_database = 'CDM'} -{DEFAULT @results_database = 'scratch'} -{DEFAULT @results_database_schema = 'scratch.dbo'} -{DEFAULT @source_name = 'CDM NAME'} -{DEFAULT @achilles_version = '1.4.6'} -{DEFAULT @smallcellcount = 5} -{DEFAULT @createTable = TRUE} -{DEFAULT @validateSchema = FALSE} - - /**** - developer comment about general ACHILLES calculation process: - you could drive # of persons by age decile, from # of persons by age decile by gender - as a general rule: do full stratification once, and then aggregate across strata to avoid re-calculation - works for all prevalence calculations...does not work for any distribution statistics - *****/ - ---{@validateSchema}?{ - --- RSD - 2014-10-27 --- Execute a series of quick select statements to verify that the CDM schema --- has all the proper tables and columns --- The point is to catch any missing tables/columns here before we spend hours --- generating results before bombing out - -create table #TableCheck -( - tablename varchar(50) -) -; - -insert into #TableCheck (tablename) -select 'care_site' -from ( -SELECT - care_site_id, - location_id, - place_of_service_concept_id, - care_site_source_value, - place_of_service_source_value, - row_number() over (order by care_site_id) rn -FROM - @cdm_database_schema.care_site -) CARE_SITE -WHERE rn = 1; - - -insert into #TableCheck (tablename) -select 'cohort' -from ( -SELECT - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id, - row_number() over (order by cohort_definition_id) rn -FROM - @cdm_database_schema.cohort -) COHORT -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'condition_era' -from ( -SELECT - condition_era_id, - person_id, - condition_concept_id, - condition_era_start_date, - condition_era_end_date, - condition_occurrence_count, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.condition_era -) CONDITION_ERA -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'condition_occurrence' -from ( -SELECT - condition_occurrence_id, - person_id, - condition_concept_id, - condition_start_date, - condition_end_date, - condition_type_concept_id, - provider_id, - visit_occurrence_id, - condition_source_value, - condition_source_concept_id, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.condition_occurrence -) condition_occurrence -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'death' -from ( -SELECT - person_id, - death_date, - death_type_concept_id, - cause_concept_id, - cause_source_value, - cause_source_concept_id, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.death -) death -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_cost' -from ( -SELECT - drug_cost_id, - drug_exposure_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid, - ingredient_cost, - dispensing_fee, - average_wholesale_price, - payer_plan_period_id, - row_number() over (order by drug_cost_id) rn -FROM - @cdm_database_schema.drug_cost -) drug_cost -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'device_exposure' -from ( -SELECT - device_exposure_id, - person_id, - device_concept_id, - device_exposure_start_date, - device_exposure_end_date, - device_type_concept_id, - unique_device_id, - quantity, - provider_id, - visit_occurrence_id, - device_source_value, - device_source_concept_id, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.device_exposure -) device_exposure -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'dose_era' -from ( -SELECT - dose_era_id, - person_id, - drug_concept_id, - unit_concept_id, - dose_value, - dose_era_start_date, - dose_era_end_date, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.dose_era -) dose_era -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_cost' -from ( -SELECT - drug_cost_id, - drug_exposure_id, - currency_concept_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid, - ingredient_cost, - dispensing_fee, - average_wholesale_price, - payer_plan_period_id, - row_number() over (order by drug_cost_id) rn -FROM - @cdm_database_schema.drug_cost -) drug_cost -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_era' -from ( -SELECT - drug_era_id, - person_id, - drug_concept_id, - drug_era_start_date, - drug_era_end_date, - drug_exposure_count, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.drug_era -) drug_era -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'drug_exposure' -from ( -SELECT - drug_exposure_id, - person_id, - drug_concept_id, - drug_exposure_start_date, - drug_exposure_end_date, - drug_type_concept_id, - stop_reason, - refills, - quantity, - days_supply, - sig, - route_concept_id, - effective_drug_dose, - dose_unit_concept_id, - lot_number, - provider_id, - visit_occurrence_id, - drug_source_value, - drug_source_concept_id, - route_source_value, - dose_unit_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.drug_exposure -) drug_exposure -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'location' -from ( -SELECT - location_id, - address_1, - address_2, - city, - STATE, - zip, - county, - location_source_value, - row_number() over (order by location_id) rn -FROM - @cdm_database_schema.location -) location -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'observation' -from ( -SELECT - observation_id, - person_id, - observation_concept_id, - observation_date, - observation_time, - value_as_number, - value_as_string, - value_as_concept_id, - qualifier_concept_id, - unit_concept_id, - observation_type_concept_id, - provider_id, - visit_occurrence_id, - observation_source_value, - observation_source_concept_id, - unit_source_value, - qualifier_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.observation -) location -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'observation_period' -from ( -SELECT - observation_period_id, - person_id, - observation_period_start_date, - observation_period_end_date, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.observation_period -) observation_period -WHERE rn = 1; - - -insert into #TableCheck (tablename) -select 'payer_plan_period' -from ( -SELECT - payer_plan_period_id, - person_id, - payer_plan_period_start_date, - payer_plan_period_end_date, - payer_source_value, - plan_source_value, - family_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.payer_plan_period -) payer_plan_period -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'person' -from ( -SELECT - person_id, - gender_concept_id, - year_of_birth, - month_of_birth, - day_of_birth, - race_concept_id, - ethnicity_concept_id, - location_id, - provider_id, - care_site_id, - person_source_value, - gender_source_value, - race_source_value, - ethnicity_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.person -) person -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'procedure_cost' -from ( -SELECT - procedure_cost_id, - procedure_occurrence_id, - currency_concept_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid, - revenue_code_concept_id, - payer_plan_period_id, - revenue_code_source_value, - row_number() over (order by procedure_cost_id) rn -FROM - @cdm_database_schema.procedure_cost -) procedure_cost -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'procedure_occurrence' -from ( -SELECT - procedure_occurrence_id, - person_id, - procedure_concept_id, - procedure_date, - procedure_type_concept_id, - modifier_concept_id, - quantity, - provider_id, - visit_occurrence_id, - procedure_source_value, - procedure_source_concept_id, - qualifier_source_value, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.procedure_occurrence -) procedure_occurrence -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'provider' -from ( -SELECT - provider_id, - NPI, - DEA, - specialty_concept_id, - care_site_id, - provider_source_value, - specialty_source_value, - row_number() over (order by provider_id) rn -FROM - @cdm_database_schema.provider -) provider -WHERE rn = 1; - -insert into #TableCheck (tablename) -select 'visit_occurrence' -from ( -SELECT - visit_occurrence_id, - person_id, - visit_start_date, - visit_end_date, - visit_type_concept_id, - provider_id, - care_site_id, - visit_source_value, - visit_source_concept_id, - row_number() over (order by person_id) rn -FROM - @cdm_database_schema.visit_occurrence -) visit_occurrence -WHERE rn = 1; - -TRUNCATE TABLE #TableCheck; -DROP TABLE #TableCheck; - ---} - - ---{@createTable}?{ - -IF OBJECT_ID('@results_database_schema.ACHILLES_analysis', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_analysis; - -create table @results_database_schema.ACHILLES_analysis -( - analysis_id int, - analysis_name varchar(255), - stratum_1_name varchar(255), - stratum_2_name varchar(255), - stratum_3_name varchar(255), - stratum_4_name varchar(255), - stratum_5_name varchar(255) -); - - -IF OBJECT_ID('@results_database_schema.ACHILLES_results', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_results; - -create table @results_database_schema.ACHILLES_results -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - stratum_3 varchar(255), - stratum_4 varchar(255), - stratum_5 varchar(255), - count_value bigint -); - - -IF OBJECT_ID('@results_database_schema.ACHILLES_results_dist', 'U') IS NOT NULL - drop table @results_database_schema.ACHILLES_results_dist; - -create table @results_database_schema.ACHILLES_results_dist -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - stratum_3 varchar(255), - stratum_4 varchar(255), - stratum_5 varchar(255), - count_value bigint, - min_value float, - max_value float, - avg_value float, - stdev_value float, - median_value float, - p10_value float, - p25_value float, - p75_value float, - p90_value float -); - - - ---end of creating tables - - ---populate the tables with names of analyses - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (0, 'Source name'); - ---000. PERSON statistics - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1, 'Number of persons'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (2, 'Number of persons by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (3, 'Number of persons by year of birth', 'year_of_birth'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (4, 'Number of persons by race', 'race_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (5, 'Number of persons by ethnicity', 'ethnicity_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (7, 'Number of persons with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (8, 'Number of persons with invalid location_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (9, 'Number of persons with invalid care_site_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (10, 'Number of all persons by year of birth by gender', 'year_of_birth', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (11, 'Number of non-deceased persons by year of birth by gender', 'year_of_birth', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (12, 'Number of persons by race and ethnicity','race_concept_id','ethnicity_concept_id'); - - ---100. OBSERVATION_PERIOD (joined to PERSON) - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (101, 'Number of persons by age, with age at first observation period', 'age'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (102, 'Number of persons by gender by age, with age at first observation period', 'gender_concept_id', 'age'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (103, 'Distribution of age at first observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (104, 'Distribution of age at first observation period by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (105, 'Length of observation (days) of first observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (106, 'Length of observation (days) of first observation period by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (107, 'Length of observation (days) of first observation period by age decile', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (108, 'Number of persons by length of observation period, in 30d increments', 'Observation period length 30d increments'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (109, 'Number of persons with continuous observation in each year', 'calendar year'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (110, 'Number of persons with continuous observation in each month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (111, 'Number of persons by observation period start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (112, 'Number of persons by observation period end month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (113, 'Number of persons by number of observation periods', 'number of observation periods'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (114, 'Number of persons with observation period before year-of-birth'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (115, 'Number of persons with observation period end < observation period start'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name) - values (116, 'Number of persons with at least one day of observation in each year by gender and age decile', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (117, 'Number of persons with at least one day of observation in each month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (118, 'Number of observation periods with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (119, 'Number of observation period records by period_type_concept_id','period_type_concept_id'); - - - - ---200- VISIT_OCCURRENCE - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (200, 'Number of persons with at least one visit occurrence, by visit_concept_id', 'visit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (201, 'Number of visit occurrence records, by visit_concept_id', 'visit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (202, 'Number of persons by visit occurrence start month, by visit_concept_id', 'visit_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (203, 'Number of distinct visit occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (204, 'Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile', 'visit_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (206, 'Distribution of age by visit_concept_id', 'visit_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (207, 'Number of visit records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (208, 'Number of visit records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (209, 'Number of visit records with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (210, 'Number of visit records with invalid care_site_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (211, 'Distribution of length of stay by visit_concept_id', 'visit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name ) - values (212, 'Number of persons with at least one visit occurrence, by calendar year by gender by age decile', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (220, 'Number of visit occurrence records by visit occurrence start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (221, 'Number of persons by visit start year', 'calendar year'); - - - ---300- PROVIDER -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (300, 'Number of providers'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (301, 'Number of providers by specialty concept_id', 'specialty_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (302, 'Number of providers with invalid care site id'); - - - ---400- CONDITION_OCCURRENCE - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (400, 'Number of persons with at least one condition occurrence, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (401, 'Number of condition occurrence records, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (402, 'Number of persons by condition occurrence start month, by condition_concept_id', 'condition_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (403, 'Number of distinct condition occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (404, 'Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile', 'condition_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (405, 'Number of condition occurrence records, by condition_concept_id by condition_type_concept_id', 'condition_concept_id', 'condition_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (406, 'Distribution of age by condition_concept_id', 'condition_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (409, 'Number of condition occurrence records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (410, 'Number of condition occurrence records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (411, 'Number of condition occurrence records with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (412, 'Number of condition occurrence records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (413, 'Number of condition occurrence records with invalid visit_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (420, 'Number of condition occurrence records by condition occurrence start month', 'calendar month'); - ---500- DEATH - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (500, 'Number of persons with death, by cause_concept_id', 'cause_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (501, 'Number of records of death, by cause_concept_id', 'cause_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (502, 'Number of persons by death month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name) - values (504, 'Number of persons with a death, by calendar year by gender by age decile', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (505, 'Number of death records, by death_type_concept_id', 'death_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (506, 'Distribution of age at death by gender', 'gender_concept_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (509, 'Number of death records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (510, 'Number of death records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (511, 'Distribution of time from death to last condition'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (512, 'Distribution of time from death to last drug'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (513, 'Distribution of time from death to last visit'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (514, 'Distribution of time from death to last procedure'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (515, 'Distribution of time from death to last observation'); - - ---600- PROCEDURE_OCCURRENCE - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (600, 'Number of persons with at least one procedure occurrence, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (601, 'Number of procedure occurrence records, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (602, 'Number of persons by procedure occurrence start month, by procedure_concept_id', 'procedure_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (603, 'Number of distinct procedure occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (604, 'Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile', 'procedure_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (605, 'Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id', 'procedure_concept_id', 'procedure_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (606, 'Distribution of age by procedure_concept_id', 'procedure_concept_id', 'gender_concept_id'); - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (609, 'Number of procedure occurrence records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (610, 'Number of procedure occurrence records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (612, 'Number of procedure occurrence records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (613, 'Number of procedure occurrence records with invalid visit_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (620, 'Number of procedure occurrence records by procedure occurrence start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (691, 'Number of persons that have at least x procedures', 'procedure_id', 'procedure_count'); - ---700- DRUG_EXPOSURE - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (700, 'Number of persons with at least one drug exposure, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (701, 'Number of drug exposure records, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (702, 'Number of persons by drug exposure start month, by drug_concept_id', 'drug_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (703, 'Number of distinct drug exposure concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (704, 'Number of persons with at least one drug exposure, by drug_concept_id by calendar year by gender by age decile', 'drug_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (705, 'Number of drug exposure records, by drug_concept_id by drug_type_concept_id', 'drug_concept_id', 'drug_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (706, 'Distribution of age by drug_concept_id', 'drug_concept_id', 'gender_concept_id'); - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (709, 'Number of drug exposure records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (710, 'Number of drug exposure records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (711, 'Number of drug exposure records with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (712, 'Number of drug exposure records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (713, 'Number of drug exposure records with invalid visit_id'); - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (715, 'Distribution of days_supply by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (716, 'Distribution of refills by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (717, 'Distribution of quantity by drug_concept_id', 'drug_concept_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (720, 'Number of drug exposure records by drug exposure start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (791, 'Number of persons that have at least x drug exposures', 'drug_concept_id', 'drug_count'); - ---800- OBSERVATION - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (800, 'Number of persons with at least one observation occurrence, by observation_concept_id', 'observation_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (801, 'Number of observation occurrence records, by observation_concept_id', 'observation_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (802, 'Number of persons by observation occurrence start month, by observation_concept_id', 'observation_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (803, 'Number of distinct observation occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (804, 'Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile', 'observation_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (805, 'Number of observation occurrence records, by observation_concept_id by observation_type_concept_id', 'observation_concept_id', 'observation_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (806, 'Distribution of age by observation_concept_id', 'observation_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (807, 'Number of observation occurrence records, by observation_concept_id and unit_concept_id', 'observation_concept_id', 'unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (809, 'Number of observation records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (810, 'Number of observation records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (812, 'Number of observation records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (813, 'Number of observation records with invalid visit_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (814, 'Number of observation records with no value (numeric, string, or concept)'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (815, 'Distribution of numeric values, by observation_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (820, 'Number of observation records by observation start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (891, 'Number of persons that have at least x observations', 'observation_concept_id', 'observation_count'); - ---900- DRUG_ERA - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (900, 'Number of persons with at least one drug era, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (901, 'Number of drug era records, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (902, 'Number of persons by drug era start month, by drug_concept_id', 'drug_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (903, 'Number of distinct drug era concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (904, 'Number of persons with at least one drug era, by drug_concept_id by calendar year by gender by age decile', 'drug_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (906, 'Distribution of age by drug_concept_id', 'drug_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (907, 'Distribution of drug era length, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (908, 'Number of drug eras without valid person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (909, 'Number of drug eras outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (910, 'Number of drug eras with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (920, 'Number of drug era records by drug era start month', 'calendar month'); - ---1000- CONDITION_ERA - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1000, 'Number of persons with at least one condition era, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1001, 'Number of condition era records, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1002, 'Number of persons by condition era start month, by condition_concept_id', 'condition_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1003, 'Number of distinct condition era concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (1004, 'Number of persons with at least one condition era, by condition_concept_id by calendar year by gender by age decile', 'condition_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1006, 'Distribution of age by condition_concept_id', 'condition_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1007, 'Distribution of condition era length, by condition_concept_id', 'condition_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1008, 'Number of condition eras without valid person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1009, 'Number of condition eras outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1010, 'Number of condition eras with end date < start date'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1020, 'Number of condition era records by condition era start month', 'calendar month'); - - - ---1100- LOCATION - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1100, 'Number of persons by location 3-digit zip', '3-digit zip'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1101, 'Number of persons by location state', 'state'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1102, 'Number of care sites by location 3-digit zip', '3-digit zip'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1103, 'Number of care sites by location state', 'state'); - - ---1200- CARE_SITE - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1200, 'Number of persons by place of service', 'place_of_service_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1201, 'Number of visits by place of service', 'place_of_service_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1202, 'Number of care sites by place of service', 'place_of_service_concept_id'); - - ---1300- ORGANIZATION - ---NOT APPLICABLE IN CDMV5 ---insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) --- values (1300, 'Number of organizations by place of service', 'place_of_service_concept_id'); - - ---1400- PAYOR_PLAN_PERIOD - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1406, 'Length of payer plan (days) of first payer plan period by gender', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1407, 'Length of payer plan (days) of first payer plan period by age decile', 'age_decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1408, 'Number of persons by length of payer plan period, in 30d increments', 'payer plan period length 30d increments'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1409, 'Number of persons with continuous payer plan in each year', 'calendar year'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1410, 'Number of persons with continuous payer plan in each month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1411, 'Number of persons by payer plan period start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1412, 'Number of persons by payer plan period end month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1413, 'Number of persons by number of payer plan periods', 'number of payer plan periods'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1414, 'Number of persons with payer plan period before year-of-birth'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1415, 'Number of persons with payer plan period end < payer plan period start'); - ---1500- DRUG_COST - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1500, 'Number of drug cost records with invalid drug exposure id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1501, 'Number of drug cost records with invalid payer plan period id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1502, 'Distribution of paid copay, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1503, 'Distribution of paid coinsurance, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1504, 'Distribution of paid toward deductible, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1505, 'Distribution of paid by payer, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1506, 'Distribution of paid by coordination of benefit, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1507, 'Distribution of total out-of-pocket, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1508, 'Distribution of total paid, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1509, 'Distribution of ingredient_cost, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1510, 'Distribution of dispensing fee, by drug_concept_id', 'drug_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1511, 'Distribution of average wholesale price, by drug_concept_id', 'drug_concept_id'); - - ---1600- PROCEDURE_COST - - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1600, 'Number of procedure cost records with invalid procedure occurrence id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1601, 'Number of procedure cost records with invalid payer plan period id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1602, 'Distribution of paid copay, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1603, 'Distribution of paid coinsurance, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1604, 'Distribution of paid toward deductible, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1605, 'Distribution of paid by payer, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1606, 'Distribution of paid by coordination of benefit, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1607, 'Distribution of total out-of-pocket, by procedure_concept_id', 'procedure_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1608, 'Distribution of total paid, by procedure_concept_id', 'procedure_concept_id'); - ---NOT APPLICABLE FOR OMOP CDM v5 ---insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) --- values (1609, 'Number of records by disease_class_concept_id', 'disease_class_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1610, 'Number of records by revenue_code_concept_id', 'revenue_code_concept_id'); - - ---1700- COHORT - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1700, 'Number of records by cohort_concept_id', 'cohort_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1701, 'Number of records with cohort end date < cohort start date'); - ---1800- MEASUREMENT - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1800, 'Number of persons with at least one measurement occurrence, by measurement_concept_id', 'measurement_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1801, 'Number of measurement occurrence records, by measurement_concept_id', 'measurement_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1802, 'Number of persons by measurement occurrence start month, by measurement_concept_id', 'measurement_concept_id', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1803, 'Number of distinct mesurement occurrence concepts per person'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (1804, 'Number of persons with at least one mesurement occurrence, by measurement_concept_id by calendar year by gender by age decile', 'measurement_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1805, 'Number of measurement occurrence records, by measurement_concept_id by measurement_type_concept_id', 'measurement_concept_id', 'measurement_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1806, 'Distribution of age by measurement_concept_id', 'measurement_concept_id', 'gender_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1807, 'Number of measurement occurrence records, by measurement_concept_id and unit_concept_id', 'measurement_concept_id', 'unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1809, 'Number of measurement records with invalid person_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1810, 'Number of measurement records outside valid observation period'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1812, 'Number of measurement records with invalid provider_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1813, 'Number of measurement records with invalid visit_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1814, 'Number of measurement records with no value (numeric, string, or concept)'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1815, 'Distribution of numeric values, by measurement_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1816, 'Distribution of low range, by measurement_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1817, 'Distribution of high range, by observation_concept_id and unit_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1818, 'Number of measurement records below/within/above normal range, by measurement_concept_id and unit_concept_id'); - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (1820, 'Number of measurement records by measurement start month', 'calendar month'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (1821, 'Number of measurement records with no numeric value'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1891, 'Number of persons that have at least x measurements', 'measurement_concept_id', 'measurement_count'); - ---1900 REPORTS - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (1900, 'Source values mapped to concept_id 0 by table, by source_value', 'table_name', 'source_value'); - - ---2000 Iris (and possibly other new measures) integrated into Achilles - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (2000, 'Number of patients with at least 1 Dx and 1 Rx'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (2001, 'Number of patients with at least 1 Dx and 1 Proc'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (2002, 'Number of patients with at least 1 Meas, 1 Dx and 1 Rx'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name) - values (2003, 'Number of patients with at least 1 Visit'); - - ---2100- DEVICE_EXPOSURE - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (2100, 'Number of persons with at least one device exposure, by device_concept_id', 'device_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (2101, 'Number of device exposure records, by device_concept_id', 'device_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (2102, 'Number of persons by device records start month, by device_concept_id', 'device_concept_id', 'calendar month'); - ---2103 was not implemented at this point - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name, stratum_3_name, stratum_4_name) - values (2104, 'Number of persons with at least one device exposure, by device_concept_id by calendar year by gender by age decile', 'device_concept_id', 'calendar year', 'gender_concept_id', 'age decile'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name, stratum_2_name) - values (2105, 'Number of device exposure records, by device_concept_id by device_type_concept_id', 'device_concept_id', 'device_type_concept_id'); - - - ---2200- NOTE - - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (2200, 'Number of persons with at least one note by note_type_concept_id', 'note_type_concept_id'); - -insert into @results_database_schema.ACHILLES_analysis (analysis_id, analysis_name, stratum_1_name) - values (2201, 'Number of note records, by note_type_concept_id', 'note_type_concept_id'); - - - - ---end of importing values into analysis lookup table - ---} : {else if not createTable -delete from @results_database_schema.ACHILLES_results where analysis_id IN (@list_of_analysis_ids); -delete from @results_database_schema.ACHILLES_results_dist where analysis_id IN (@list_of_analysis_ids); ---} - -/**** -7. generate results for analysis_results - - -****/ - ---{0 IN (@list_of_analysis_ids)}?{ --- 0 cdm name, version of Achilles and date when pre-computations were executed -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3,count_value) -select 0 as analysis_id, CAST('@source_name' AS VARCHAR(255)) as stratum_1, CAST('@achilles_version' AS VARCHAR(255)) as stratum_2, CAST(GETDATE() AS VARCHAR(255)) as stratum_3,COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value) -select 0 as analysis_id, CAST('@source_name' AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON; - ---} - - -/******************************************** - -ACHILLES Analyses on PERSON table - -*********************************************/ - - - ---{1 IN (@list_of_analysis_ids)}?{ --- 1 Number of persons -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1 as analysis_id, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON; ---} - - ---{2 IN (@list_of_analysis_ids)}?{ --- 2 Number of persons by gender -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 2 as analysis_id, CAST(gender_concept_id AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by GENDER_CONCEPT_ID; ---} - - - ---{3 IN (@list_of_analysis_ids)}?{ --- 3 Number of persons by year of birth -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 3 as analysis_id, CAST(year_of_birth AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by YEAR_OF_BIRTH; ---} - - ---{4 IN (@list_of_analysis_ids)}?{ --- 4 Number of persons by race -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 4 as analysis_id, CAST(RACE_CONCEPT_ID AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by RACE_CONCEPT_ID; ---} - - - ---{5 IN (@list_of_analysis_ids)}?{ --- 5 Number of persons by ethnicity -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 5 as analysis_id, CAST(ETHNICITY_CONCEPT_ID AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by ETHNICITY_CONCEPT_ID; ---} - - - - - ---{7 IN (@list_of_analysis_ids)}?{ --- 7 Number of persons with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 7 as analysis_id, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - left join @cdm_database_schema.provider pr1 - on p1.provider_id = pr1.provider_id -where p1.provider_id is not null - and pr1.provider_id is null -; ---} - - - ---{8 IN (@list_of_analysis_ids)}?{ --- 8 Number of persons with invalid location_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 8 as analysis_id, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - left join @cdm_database_schema.location l1 - on p1.location_id = l1.location_id -where p1.location_id is not null - and l1.location_id is null -; ---} - - ---{9 IN (@list_of_analysis_ids)}?{ --- 9 Number of persons with invalid care_site_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 9 as analysis_id, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - left join @cdm_database_schema.care_site cs1 - on p1.care_site_id = cs1.care_site_id -where p1.care_site_id is not null - and cs1.care_site_id is null -; ---} - - - ---{10 IN (@list_of_analysis_ids)}?{ --- 10 Number of all persons by year of birth and by gender -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 10 as analysis_id, CAST(year_of_birth AS VARCHAR(255)) as stratum_1, - CAST(gender_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by YEAR_OF_BIRTH, gender_concept_id; ---} - - ---{11 IN (@list_of_analysis_ids)}?{ --- 11 Number of non-deceased persons by year of birth and by gender -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 11 as analysis_id, CAST(year_of_birth AS VARCHAR(255)) as stratum_1, - CAST(gender_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -where person_id not in (select person_id from @cdm_database_schema.DEATH) -group by YEAR_OF_BIRTH, gender_concept_id; ---} - - - ---{12 IN (@list_of_analysis_ids)}?{ --- 12 Number of persons by race and ethnicity -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 12 as analysis_id, CAST(RACE_CONCEPT_ID AS VARCHAR(255)) as stratum_1, CAST(ETHNICITY_CONCEPT_ID AS VARCHAR(255)) as stratum_2, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON -group by RACE_CONCEPT_ID,ETHNICITY_CONCEPT_ID; ---} - -/******************************************** - -ACHILLES Analyses on OBSERVATION_PERIOD table - -*********************************************/ - ---{101 IN (@list_of_analysis_ids)}?{ --- 101 Number of persons by age, with age at first observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 101 as analysis_id, CAST(year(op1.index_date) - p1.YEAR_OF_BIRTH AS VARCHAR(255)) as stratum_1, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 - on p1.PERSON_ID = op1.PERSON_ID -group by year(op1.index_date) - p1.YEAR_OF_BIRTH; ---} - - - ---{102 IN (@list_of_analysis_ids)}?{ --- 102 Number of persons by gender by age, with age at first observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 102 as analysis_id, CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_1, CAST(year(op1.index_date) - p1.YEAR_OF_BIRTH AS VARCHAR(255)) as stratum_2, COUNT_BIG(p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 - on p1.PERSON_ID = op1.PERSON_ID -group by p1.gender_concept_id, year(op1.index_date) - p1.YEAR_OF_BIRTH; ---} - - ---{103 IN (@list_of_analysis_ids)}?{ --- 103 Distribution of age at first observation period -with rawData (person_id, age_value) as -( -select p.person_id, - MIN(YEAR(observation_period_start_date)) - P.YEAR_OF_BIRTH as age_value - from @cdm_database_schema.PERSON p - JOIN @cdm_database_schema.OBSERVATION_PERIOD op on p.person_id = op.person_id - group by p.person_id, p.year_of_birth -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * age_value) AS FLOAT) as avg_value, - CAST(stdev(age_value) AS FLOAT) as stdev_value, - min(age_value) as min_value, - max(age_value) as max_value, - count_big(*) as total - FROM rawData -), -ageStats (age_value, total, rn) as -( - select age_value, count_big(*) as total, row_number() over (order by age_value) as rn - from rawData - group by age_value -), -ageStatsPrior (age_value, total, accumulated) as -( - select s.age_value, s.total, sum(p.total) as accumulated - from ageStats s - join ageStats p on p.rn <= s.rn - group by s.age_value, s.total, s.rn -) -select 103 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then age_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then age_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then age_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then age_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then age_value end) as p90_value -INTO #tempResults -from ageStatsPrior p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; ---} - - - ---{104 IN (@list_of_analysis_ids)}?{ --- 104 Distribution of age at first observation period by gender -with rawData (gender_concept_id, age_value) as -( - select p.gender_concept_id, MIN(YEAR(observation_period_start_date)) - P.YEAR_OF_BIRTH as age_value - from @cdm_database_schema.PERSON p - JOIN @cdm_database_schema.OBSERVATION_PERIOD op on p.person_id = op.person_id - group by p.person_id,p.gender_concept_id, p.year_of_birth -), -overallStats (gender_concept_id, avg_value, stdev_value, min_value, max_value, total) as -( - select gender_concept_id, - CAST(avg(1.0 * age_value) AS FLOAT) as avg_value, - CAST(stdev(age_value) AS FLOAT) as stdev_value, - min(age_value) as min_value, - max(age_value) as max_value, - count_big(*) as total - FROM rawData - group by gender_concept_id -), -ageStats (gender_concept_id, age_value, total, rn) as -( - select gender_concept_id, age_value, count_big(*) as total, row_number() over (order by age_value) as rn - FROM rawData - group by gender_concept_id, age_value -), -ageStatsPrior (gender_concept_id, age_value, total, accumulated) as -( - select s.gender_concept_id, s.age_value, s.total, sum(p.total) as accumulated - from ageStats s - join ageStats p on s.gender_concept_id = p.gender_concept_id and p.rn <= s.rn - group by s.gender_concept_id, s.age_value, s.total, s.rn -) -select 104 as analysis_id, - CAST(o.gender_concept_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then age_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then age_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then age_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then age_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then age_value end) as p90_value -INTO #tempResults -from ageStatsPrior p -join overallStats o on p.gender_concept_id = o.gender_concept_id -GROUP BY o.gender_concept_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; ---} - ---{105 IN (@list_of_analysis_ids)}?{ --- 105 Length of observation (days) of first observation period -with rawData (count_value) as -( - select count_value - FROM - ( - select DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value, - ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn - from @cdm_database_schema.OBSERVATION_PERIOD op - ) op - where op.rn = 1 -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM - ( - select DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value, - ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn - from @cdm_database_schema.OBSERVATION_PERIOD op - ) op - where op.rn = 1 - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 105 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; ---} - - ---{106 IN (@list_of_analysis_ids)}?{ --- 106 Length of observation (days) of first observation period by gender -with rawData(gender_concept_id, count_value) as -( - select p.gender_concept_id, op.count_value - FROM - ( - select person_id, DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value, - ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn - from @cdm_database_schema.OBSERVATION_PERIOD op - ) op - JOIN @cdm_database_schema.PERSON p on op.person_id = p.person_id - where op.rn = 1 -), -overallStats (gender_concept_id, avg_value, stdev_value, min_value, max_value, total) as -( - select gender_concept_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by gender_concept_id -), -statsView (gender_concept_id, count_value, total, rn) as -( - select gender_concept_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by gender_concept_id, count_value -), -priorStats (gender_concept_id,count_value, total, accumulated) as -( - select s.gender_concept_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.gender_concept_id = p.gender_concept_id and p.rn <= s.rn - group by s.gender_concept_id, s.count_value, s.total, s.rn -) -select 106 as analysis_id, - CAST(o.gender_concept_id AS VARCHAR(255)) as gender_concept_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value end) as p90_value -INTO #tempResults -from priorStats p -join overallStats o on p.gender_concept_id = o.gender_concept_id -GROUP BY o.gender_concept_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, gender_concept_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -FROM #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{107 IN (@list_of_analysis_ids)}?{ --- 107 Length of observation (days) of first observation period by age decile - -with rawData (age_decile, count_value) as -( - select floor((year(op.OBSERVATION_PERIOD_START_DATE) - p.YEAR_OF_BIRTH)/10) as age_decile, - DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value - FROM - ( - select person_id, - op.observation_period_start_date, - op.observation_period_end_date, - ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn - from @cdm_database_schema.OBSERVATION_PERIOD op - ) op - JOIN @cdm_database_schema.PERSON p on op.person_id = p.person_id - where op.rn = 1 -), -overallStats (age_decile, avg_value, stdev_value, min_value, max_value, total) as -( - select age_decile, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by age_decile -), -statsView (age_decile, count_value, total, rn) as -( - select age_decile, - count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by age_decile, count_value -), -priorStats (age_decile,count_value, total, accumulated) as -( - select s.age_decile, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.age_decile = p.age_decile and p.rn <= s.rn - group by s.age_decile, s.count_value, s.total, s.rn -) -select 107 as analysis_id, - CAST(o.age_decile AS VARCHAR(255)) as age_decile, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.age_decile = o.age_decile -GROUP BY o.age_decile, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, age_decile, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -FROM #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{108 IN (@list_of_analysis_ids)}?{ --- 108 Number of persons by length of observation period, in 30d increments -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 108 as analysis_id, CAST(floor(DATEDIFF(dd, op1.observation_period_start_date, op1.observation_period_end_date)/30) AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - OBSERVATION_PERIOD_START_DATE, - OBSERVATION_PERIOD_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by observation_period_start_date asc) as rn1 - from @cdm_database_schema.OBSERVATION_PERIOD - ) op1 - on p1.PERSON_ID = op1.PERSON_ID - where op1.rn1 = 1 -group by floor(DATEDIFF(dd, op1.observation_period_start_date, op1.observation_period_end_date)/30) -; ---} - - - - ---{109 IN (@list_of_analysis_ids)}?{ --- 109 Number of persons with continuous observation in each year --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -SELECT DISTINCT - YEAR(observation_period_start_date) AS obs_year, - DATEFROMPARTS(YEAR(observation_period_start_date), 1, 1) AS obs_year_start, - DATEFROMPARTS(YEAR(observation_period_start_date), 12, 31) AS obs_year_end -INTO - #temp_dates -FROM @cdm_database_schema.observation_period -; - -INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -SELECT - 109 AS analysis_id, - CAST(obs_year AS VARCHAR(255)) AS stratum_1, - COUNT_BIG(DISTINCT person_id) AS count_value -FROM @cdm_database_schema.observation_period, - #temp_dates -WHERE - observation_period_start_date <= obs_year_start - AND - observation_period_end_date >= obs_year_end -GROUP BY - obs_year -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - ---{110 IN (@list_of_analysis_ids)}?{ --- 110 Number of persons with continuous observation in each month --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -SELECT DISTINCT - YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) AS obs_month, - DATEFROMPARTS(YEAR(observation_period_start_date), MONTH(OBSERVATION_PERIOD_START_DATE), 1) AS obs_month_start, - EOMONTH(observation_period_start_date) AS obs_month_end -INTO - #temp_dates -FROM @cdm_database_schema.observation_period -; - - -INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -SELECT - 110 AS analysis_id, - CAST(obs_month AS VARCHAR(255)) as stratum_1, - COUNT_BIG(DISTINCT person_id) AS count_value -FROM - @cdm_database_schema.observation_period, - #temp_Dates -WHERE - observation_period_start_date <= obs_month_start - AND - observation_period_end_date >= obs_month_end -GROUP BY - obs_month -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - - ---{111 IN (@list_of_analysis_ids)}?{ --- 111 Number of persons by observation period start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 111 as analysis_id, - CAST(YEAR(observation_period_start_date)*100 + month(OBSERVATION_PERIOD_START_DATE) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 -group by YEAR(observation_period_start_date)*100 + month(OBSERVATION_PERIOD_START_DATE) -; ---} - - - ---{112 IN (@list_of_analysis_ids)}?{ --- 112 Number of persons by observation period end month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 112 as analysis_id, - CAST(YEAR(observation_period_end_date)*100 + month(observation_period_end_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 -group by YEAR(observation_period_end_date)*100 + month(observation_period_end_date) -; ---} - - ---{113 IN (@list_of_analysis_ids)}?{ --- 113 Number of persons by number of observation periods -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 113 as analysis_id, - CAST(op1.num_periods AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - (select person_id, COUNT_BIG(OBSERVATION_period_start_date) as num_periods from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 -group by op1.num_periods -; ---} - ---{114 IN (@list_of_analysis_ids)}?{ --- 114 Number of persons with observation period before year-of-birth -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 114 as analysis_id, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(year(OBSERVATION_period_start_date)) as first_obs_year from @cdm_database_schema.OBSERVATION_PERIOD group by PERSON_ID) op1 - on p1.person_id = op1.person_id -where p1.year_of_birth > op1.first_obs_year -; ---} - ---{115 IN (@list_of_analysis_ids)}?{ --- 115 Number of persons with observation period end < start -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 115 as analysis_id, - COUNT_BIG(op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 -where op1.observation_period_end_date < op1.observation_period_start_date -; ---} - - - ---{116 IN (@list_of_analysis_ids)}?{ --- 116 Number of persons with at least one day of observation in each year by gender and age decile --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -select distinct - YEAR(observation_period_start_date) as obs_year -INTO - #temp_dates -from - @cdm_database_schema.OBSERVATION_PERIOD -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 116 as analysis_id, - CAST(t1.obs_year AS VARCHAR(255)) as stratum_1, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_2, - CAST(floor((t1.obs_year - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_3, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join - @cdm_database_schema.observation_period op1 - on p1.person_id = op1.person_id - , - #temp_dates t1 -where year(op1.OBSERVATION_PERIOD_START_DATE) <= t1.obs_year - and year(op1.OBSERVATION_PERIOD_END_DATE) >= t1.obs_year -group by t1.obs_year, - p1.gender_concept_id, - floor((t1.obs_year - p1.year_of_birth)/10) -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - ---{117 IN (@list_of_analysis_ids)}?{ --- 117 Number of persons with at least one day of observation in each year by gender and age decile --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -select distinct - YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) as obs_month -into - #temp_dates -from - @cdm_database_schema.OBSERVATION_PERIOD -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 117 as analysis_id, - CAST(t1.obs_month AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1, - #temp_dates t1 -where YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) <= t1.obs_month - and YEAR(observation_period_end_date)*100 + MONTH(observation_period_end_date) >= t1.obs_month -group by t1.obs_month -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - ---{118 IN (@list_of_analysis_ids)}?{ --- 118 Number of observation period records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 118 as analysis_id, - COUNT_BIG(op1.PERSON_ID) as count_value -from - @cdm_database_schema.observation_period op1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = op1.person_id -where p1.person_id is null -; ---} - ---{119 IN (@list_of_analysis_ids)}?{ --- 119 Number of observation period records by period_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1,count_value) -select 119 as analysis_id, - CAST(op1.period_type_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(*) as count_value -from - @cdm_database_schema.observation_period op1 -group by op1.period_type_concept_id -; ---} - - -/******************************************** - -ACHILLES Analyses on VISIT_OCCURRENCE table - -*********************************************/ - - ---{200 IN (@list_of_analysis_ids)}?{ --- 200 Number of persons with at least one visit occurrence, by visit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 200 as analysis_id, - CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 -group by vo1.visit_concept_id -; ---} - - ---{201 IN (@list_of_analysis_ids)}?{ --- 201 Number of visit occurrence records, by visit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 201 as analysis_id, - CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 -group by vo1.visit_concept_id -; ---} - - - ---{202 IN (@list_of_analysis_ids)}?{ --- 202 Number of persons by visit occurrence start month, by visit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 202 as analysis_id, - CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(visit_start_date)*100 + month(visit_start_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.visit_occurrence vo1 -group by vo1.visit_concept_id, - YEAR(visit_start_date)*100 + month(visit_start_date) -; ---} - - - ---{203 IN (@list_of_analysis_ids)}?{ --- 203 Number of distinct visit occurrence concepts per person - -with rawData(person_id, count_value) as -( - select vo1.person_id, COUNT_BIG(distinct vo1.visit_concept_id) as count_value - from @cdm_database_schema.visit_occurrence vo1 - group by vo1.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 203 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -INTO #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -FROM #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{204 IN (@list_of_analysis_ids)}?{ --- 204 Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 204 as analysis_id, - CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(visit_start_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(visit_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.visit_occurrence vo1 -on p1.person_id = vo1.person_id -group by vo1.visit_concept_id, - YEAR(visit_start_date), - p1.gender_concept_id, - floor((year(visit_start_date) - p1.year_of_birth)/10) -; ---} - - - - - ---{206 IN (@list_of_analysis_ids)}?{ --- 206 Distribution of age by visit_concept_id - -with rawData(stratum1_id, stratum2_id, count_value) as -( - select vo1.visit_concept_id, - p1.gender_concept_id, - vo1.visit_start_year - p1.year_of_birth as count_value - from @cdm_database_schema.PERSON p1 - inner join - ( - select person_id, visit_concept_id, min(year(visit_start_date)) as visit_start_year - from @cdm_database_schema.visit_occurrence - group by person_id, visit_concept_id - ) vo1 on p1.person_id = vo1.person_id -), -overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum1_id, stratum2_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select stratum1_id, stratum2_id, count_value, count_big(*) as total, row_number() over (partition by stratum1_id, stratum2_id order by count_value) as rn - FROM rawData - group by stratum1_id, stratum2_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 206 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{207 IN (@list_of_analysis_ids)}?{ ---207 Number of visit records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 207 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = vo1.person_id -where p1.person_id is null -; ---} - - ---{208 IN (@list_of_analysis_ids)}?{ ---208 Number of visit records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 208 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = vo1.person_id - and vo1.visit_start_date >= op1.observation_period_start_date - and vo1.visit_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - ---{209 IN (@list_of_analysis_ids)}?{ ---209 Number of visit records with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 209 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 -where visit_end_date < visit_start_date -; ---} - ---{210 IN (@list_of_analysis_ids)}?{ ---210 Number of visit records with invalid care_site_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 210 as analysis_id, - COUNT_BIG(vo1.PERSON_ID) as count_value -from - @cdm_database_schema.visit_occurrence vo1 - left join @cdm_database_schema.care_site cs1 - on vo1.care_site_id = cs1.care_site_id -where vo1.care_site_id is not null - and cs1.care_site_id is null -; ---} - - ---{211 IN (@list_of_analysis_ids)}?{ --- 211 Distribution of length of stay by visit_concept_id -with rawData(stratum_id, count_value) as -( - select visit_concept_id, datediff(dd,visit_start_date,visit_end_date) as count_value - from @cdm_database_schema.visit_occurrence -), -overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum_id -), -statsView (stratum_id, count_value, total, rn) as -( - select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by stratum_id, count_value -), -priorStats (stratum_id, count_value, total, accumulated) as -( - select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn - group by s.stratum_id, s.count_value, s.total, s.rn -) -select 211 as analysis_id, - CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum_id = o.stratum_id -GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{212 IN (@list_of_analysis_ids)}?{ --- 212 Number of persons with at least one visit occurrence by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 212 as analysis_id, - CAST(YEAR(visit_start_date) AS VARCHAR(255)), - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_2, - CAST(floor((year(visit_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_3, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.visit_occurrence vo1 -on p1.person_id = vo1.person_id -group by - YEAR(visit_start_date), - p1.gender_concept_id, - floor((year(visit_start_date) - p1.year_of_birth)/10) -; ---} - - ---{220 IN (@list_of_analysis_ids)}?{ --- 220 Number of visit occurrence records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 220 as analysis_id, - CAST(YEAR(visit_start_date)*100 + month(visit_start_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.visit_occurrence vo1 -group by YEAR(visit_start_date)*100 + month(visit_start_date) -; ---} - - ---{221 IN (@list_of_analysis_ids)}?{ --- 221 Number of persons by visit start year -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 221 as analysis_id, - CAST(YEAR(visit_start_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.visit_occurrence vo1 -group by YEAR(visit_start_date) -; ---} - - - - - -/******************************************** - -ACHILLES Analyses on PROVIDER table - -*********************************************/ - - ---{300 IN (@list_of_analysis_ids)}?{ --- 300 Number of providers -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 300 as analysis_id, COUNT_BIG(distinct provider_id) as count_value -from @cdm_database_schema.provider; ---} - - ---{301 IN (@list_of_analysis_ids)}?{ --- 301 Number of providers by specialty concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 301 as analysis_id, CAST(specialty_concept_id AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct provider_id) as count_value -from @cdm_database_schema.provider -group by specialty_CONCEPT_ID; ---} - ---{302 IN (@list_of_analysis_ids)}?{ --- 302 Number of providers with invalid care site id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 302 as analysis_id, COUNT_BIG(provider_id) as count_value -from @cdm_database_schema.provider p1 - left join @cdm_database_schema.care_site cs1 - on p1.care_site_id = cs1.care_site_id -where p1.care_site_id is not null - and cs1.care_site_id is null -; ---} - - - -/******************************************** - -ACHILLES Analyses on CONDITION_OCCURRENCE table - -*********************************************/ - - ---{400 IN (@list_of_analysis_ids)}?{ --- 400 Number of persons with at least one condition occurrence, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 400 as analysis_id, - CAST(co1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -group by co1.condition_CONCEPT_ID -; ---} - - ---{401 IN (@list_of_analysis_ids)}?{ --- 401 Number of condition occurrence records, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 401 as analysis_id, - CAST(co1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -group by co1.condition_CONCEPT_ID -; ---} - - - ---{402 IN (@list_of_analysis_ids)}?{ --- 402 Number of persons by condition occurrence start month, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 402 as analysis_id, - CAST(co1.condition_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(condition_start_date)*100 + month(condition_start_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.condition_occurrence co1 -group by co1.condition_concept_id, - YEAR(condition_start_date)*100 + month(condition_start_date) -; ---} - - - ---{403 IN (@list_of_analysis_ids)}?{ --- 403 Number of distinct condition occurrence concepts per person -with rawData(person_id, count_value) as -( - select person_id, COUNT_BIG(distinct condition_concept_id) as num_conditions - from @cdm_database_schema.condition_occurrence - group by person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 403 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{404 IN (@list_of_analysis_ids)}?{ --- 404 Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 404 as analysis_id, - CAST(co1.condition_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(condition_start_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(condition_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.condition_occurrence co1 -on p1.person_id = co1.person_id -group by co1.condition_concept_id, - YEAR(condition_start_date), - p1.gender_concept_id, - floor((year(condition_start_date) - p1.year_of_birth)/10) -; ---} - ---{405 IN (@list_of_analysis_ids)}?{ --- 405 Number of condition occurrence records, by condition_concept_id by condition_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 405 as analysis_id, - CAST(co1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(co1.condition_type_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -group by co1.condition_CONCEPT_ID, - co1.condition_type_concept_id -; ---} - - - ---{406 IN (@list_of_analysis_ids)}?{ --- 406 Distribution of age by condition_concept_id -select co1.condition_concept_id as subject_id, - p1.gender_concept_id, - (co1.condition_start_year - p1.year_of_birth) as count_value -INTO #rawData_406 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, condition_concept_id, min(year(condition_start_date)) as condition_start_year - from @cdm_database_schema.condition_occurrence - group by person_id, condition_concept_id -) co1 on p1.person_id = co1.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_406 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_406 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 406 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -INTO #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - -truncate Table #rawData_406; -drop table #rawData_406; - ---} - - ---{409 IN (@list_of_analysis_ids)}?{ --- 409 Number of condition occurrence records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 409 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = co1.person_id -where p1.person_id is null -; ---} - - ---{410 IN (@list_of_analysis_ids)}?{ --- 410 Number of condition occurrence records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 410 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = co1.person_id - and co1.condition_start_date >= op1.observation_period_start_date - and co1.condition_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{411 IN (@list_of_analysis_ids)}?{ --- 411 Number of condition occurrence records with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 411 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 -where co1.condition_end_date < co1.condition_start_date -; ---} - - ---{412 IN (@list_of_analysis_ids)}?{ --- 412 Number of condition occurrence records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 412 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = co1.provider_id -where co1.provider_id is not null - and p1.provider_id is null -; ---} - ---{413 IN (@list_of_analysis_ids)}?{ --- 413 Number of condition occurrence records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 413 as analysis_id, - COUNT_BIG(co1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_occurrence co1 - left join @cdm_database_schema.visit_occurrence vo1 - on co1.visit_occurrence_id = vo1.visit_occurrence_id -where co1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - ---{420 IN (@list_of_analysis_ids)}?{ --- 420 Number of condition occurrence records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 420 as analysis_id, - CAST(YEAR(condition_start_date)*100 + month(condition_start_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.condition_occurrence co1 -group by YEAR(condition_start_date)*100 + month(condition_start_date) -; ---} - - - -/******************************************** - -ACHILLES Analyses on DEATH table - -*********************************************/ - - - ---{500 IN (@list_of_analysis_ids)}?{ --- 500 Number of persons with death, by cause_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 500 as analysis_id, - CAST(d1.cause_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 -group by d1.cause_concept_id -; ---} - - ---{501 IN (@list_of_analysis_ids)}?{ --- 501 Number of records of death, by cause_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 501 as analysis_id, - CAST(d1.cause_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 -group by d1.cause_concept_id -; ---} - - - ---{502 IN (@list_of_analysis_ids)}?{ --- 502 Number of persons by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 502 as analysis_id, - CAST(YEAR(death_date)*100 + month(death_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.death d1 -group by YEAR(death_date)*100 + month(death_date) -; ---} - - - ---{504 IN (@list_of_analysis_ids)}?{ --- 504 Number of persons with a death, by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 504 as analysis_id, - CAST(YEAR(death_date) AS VARCHAR(255)) as stratum_1, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_2, - CAST(floor((year(death_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_3, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.death d1 -on p1.person_id = d1.person_id -group by YEAR(death_date), - p1.gender_concept_id, - floor((year(death_date) - p1.year_of_birth)/10) -; ---} - ---{505 IN (@list_of_analysis_ids)}?{ --- 505 Number of death records, by death_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 505 as analysis_id, - CAST(death_type_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from - @cdm_database_schema.death d1 -group by death_type_concept_id -; ---} - - - ---{506 IN (@list_of_analysis_ids)}?{ --- 506 Distribution of age by condition_concept_id - -with rawData(stratum_id, count_value) as -( - select p1.gender_concept_id, - d1.death_year - p1.year_of_birth as count_value - from @cdm_database_schema.PERSON p1 - inner join - (select person_id, min(year(death_date)) as death_year - from @cdm_database_schema.death - group by person_id - ) d1 - on p1.person_id = d1.person_id -), -overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum_id -), -statsView (stratum_id, count_value, total, rn) as -( - select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by stratum_id, count_value -), -priorStats (stratum_id, count_value, total, accumulated) as -( - select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn - group by s.stratum_id, s.count_value, s.total, s.rn -) -select 506 as analysis_id, - CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum_id = o.stratum_id -GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; - -drop table #tempResults; ---} - - - ---{509 IN (@list_of_analysis_ids)}?{ --- 509 Number of death records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 509 as analysis_id, - COUNT_BIG(d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 - left join @cdm_database_schema.person p1 - on d1.person_id = p1.person_id -where p1.person_id is null -; ---} - - - ---{510 IN (@list_of_analysis_ids)}?{ --- 510 Number of death records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 510 as analysis_id, - COUNT_BIG(d1.PERSON_ID) as count_value -from - @cdm_database_schema.death d1 - left join @cdm_database_schema.observation_period op1 - on d1.person_id = op1.person_id - and d1.death_date >= op1.observation_period_start_date - and d1.death_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{511 IN (@list_of_analysis_ids)}?{ --- 511 Distribution of time from death to last condition -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select 511 as analysis_id, - COUNT_BIG(count_value) as count_value, - min(count_value) as min_value, - max(count_value) as max_value, - CAST(avg(1.0*count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - max(case when p1<=0.50 then count_value else -9999 end) as median_value, - max(case when p1<=0.10 then count_value else -9999 end) as p10_value, - max(case when p1<=0.25 then count_value else -9999 end) as p25_value, - max(case when p1<=0.75 then count_value else -9999 end) as p75_value, - max(case when p1<=0.90 then count_value else -9999 end) as p90_value -from -( -select datediff(dd,d1.death_date, t0.max_date) as count_value, - 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over () + 1) as p1 -from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(condition_start_date) as max_date - from @cdm_database_schema.condition_occurrence - group by person_id - ) t0 on d1.person_id = t0.person_id -) t1 -; ---} - - ---{512 IN (@list_of_analysis_ids)}?{ --- 512 Distribution of time from death to last drug -with rawData(count_value) as -( - select datediff(dd,d1.death_date, t0.max_date) as count_value - from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(drug_exposure_start_date) as max_date - from @cdm_database_schema.drug_exposure - group by person_id - ) t0 - on d1.person_id = t0.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 512 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -FROM #tempResults -; - -truncate table #tempResults; - -drop table #tempResults; - - ---} - - ---{513 IN (@list_of_analysis_ids)}?{ --- 513 Distribution of time from death to last visit -with rawData(count_value) as -( - select datediff(dd,d1.death_date, t0.max_date) as count_value - from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(visit_start_date) as max_date - from @cdm_database_schema.visit_occurrence - group by person_id - ) t0 - on d1.person_id = t0.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 513 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; - -drop table #tempResults; - ---} - - ---{514 IN (@list_of_analysis_ids)}?{ --- 514 Distribution of time from death to last procedure -with rawData(count_value) as -( - select datediff(dd,d1.death_date, t0.max_date) as count_value - from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(procedure_date) as max_date - from @cdm_database_schema.procedure_occurrence - group by person_id - ) t0 - on d1.person_id = t0.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 514 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; - -drop table #tempResults; - ---} - - ---{515 IN (@list_of_analysis_ids)}?{ --- 515 Distribution of time from death to last observation -with rawData(count_value) as -( - select datediff(dd,d1.death_date, t0.max_date) as count_value - from @cdm_database_schema.death d1 - inner join - ( - select person_id, max(observation_date) as max_date - from @cdm_database_schema.observation - group by person_id - ) t0 - on d1.person_id = t0.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 515 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - - -/******************************************** - -ACHILLES Analyses on PROCEDURE_OCCURRENCE table - -*********************************************/ - - - ---{600 IN (@list_of_analysis_ids)}?{ --- 600 Number of persons with at least one procedure occurrence, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 600 as analysis_id, - CAST(po1.procedure_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_CONCEPT_ID -; ---} - - ---{601 IN (@list_of_analysis_ids)}?{ --- 601 Number of procedure occurrence records, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 601 as analysis_id, - CAST(po1.procedure_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_CONCEPT_ID -; ---} - - - ---{602 IN (@list_of_analysis_ids)}?{ --- 602 Number of persons by procedure occurrence start month, by procedure_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 602 as analysis_id, - CAST(po1.procedure_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(procedure_date)*100 + month(procedure_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_concept_id, - YEAR(procedure_date)*100 + month(procedure_date) -; ---} - - - ---{603 IN (@list_of_analysis_ids)}?{ --- 603 Number of distinct procedure occurrence concepts per person -with rawData(count_value) as -( - select COUNT_BIG(distinct po.procedure_concept_id) as num_procedures - from @cdm_database_schema.procedure_occurrence po - group by po.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 603 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - - ---{604 IN (@list_of_analysis_ids)}?{ --- 604 Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 604 as analysis_id, - CAST(po1.procedure_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(procedure_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(procedure_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.procedure_occurrence po1 -on p1.person_id = po1.person_id -group by po1.procedure_concept_id, - YEAR(procedure_date), - p1.gender_concept_id, - floor((year(procedure_date) - p1.year_of_birth)/10) -; ---} - ---{605 IN (@list_of_analysis_ids)}?{ --- 605 Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 605 as analysis_id, - CAST(po1.procedure_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(po1.procedure_type_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 -group by po1.procedure_CONCEPT_ID, - po1.procedure_type_concept_id -; ---} - - - ---{606 IN (@list_of_analysis_ids)}?{ --- 606 Distribution of age by procedure_concept_id -select po1.procedure_concept_id as subject_id, - p1.gender_concept_id, - po1.procedure_start_year - p1.year_of_birth as count_value -INTO #rawData_606 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, procedure_concept_id, min(year(procedure_date)) as procedure_start_year - from @cdm_database_schema.procedure_occurrence - group by person_id, procedure_concept_id -) po1 on p1.person_id = po1.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_606 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_606 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 606 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; -truncate table #rawData_606; -drop table #rawData_606; - ---} - ---{609 IN (@list_of_analysis_ids)}?{ --- 609 Number of procedure occurrence records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 609 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = po1.person_id -where p1.person_id is null -; ---} - - ---{610 IN (@list_of_analysis_ids)}?{ --- 610 Number of procedure occurrence records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 610 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = po1.person_id - and po1.procedure_date >= op1.observation_period_start_date - and po1.procedure_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - - ---{612 IN (@list_of_analysis_ids)}?{ --- 612 Number of procedure occurrence records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 612 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = po1.provider_id -where po1.provider_id is not null - and p1.provider_id is null -; ---} - ---{613 IN (@list_of_analysis_ids)}?{ --- 613 Number of procedure occurrence records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 613 as analysis_id, - COUNT_BIG(po1.PERSON_ID) as count_value -from - @cdm_database_schema.procedure_occurrence po1 - left join @cdm_database_schema.visit_occurrence vo1 - on po1.visit_occurrence_id = vo1.visit_occurrence_id -where po1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - - ---{620 IN (@list_of_analysis_ids)}?{ --- 620 Number of procedure occurrence records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 620 as analysis_id, - CAST(YEAR(procedure_date)*100 + month(procedure_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.procedure_occurrence po1 -group by YEAR(procedure_date)*100 + month(procedure_date) -; ---} - - ---{691 IN (@list_of_analysis_ids)}?{ --- 691 Number of total persons that have at least x procedures -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select - 691 as analysis_id, - CAST(procedure_concept_id AS VARCHAR(255)) as stratum_1, - CAST(prc_cnt AS VARCHAR(255)) as stratum_2, - sum(count(person_id)) over (partition by procedure_concept_id order by prc_cnt desc) as count_value -from ( - select - p.procedure_concept_id, - count(p.procedure_occurrence_id) as prc_cnt, - p.person_id - from @cdm_database_schema.procedure_occurrence p - group by p.person_id, p.procedure_concept_id -) cnt_q -group by cnt_q.procedure_concept_id, cnt_q.prc_cnt; ---} - -/******************************************** - -ACHILLES Analyses on DRUG_EXPOSURE table - -*********************************************/ - - - - ---{700 IN (@list_of_analysis_ids)}?{ --- 700 Number of persons with at least one drug occurrence, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 700 as analysis_id, - CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -group by de1.drug_CONCEPT_ID -; ---} - - ---{701 IN (@list_of_analysis_ids)}?{ --- 701 Number of drug occurrence records, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 701 as analysis_id, - CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -group by de1.drug_CONCEPT_ID -; ---} - - - ---{702 IN (@list_of_analysis_ids)}?{ --- 702 Number of persons by drug occurrence start month, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 702 as analysis_id, - CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.drug_exposure de1 -group by de1.drug_concept_id, - YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) -; ---} - - - ---{703 IN (@list_of_analysis_ids)}?{ --- 703 Number of distinct drug exposure concepts per person -with rawData(count_value) as -( - select num_drugs as count_value - from - ( - select de1.person_id, COUNT_BIG(distinct de1.drug_concept_id) as num_drugs - from - @cdm_database_schema.drug_exposure de1 - group by de1.person_id - ) t0 -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 703 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{704 IN (@list_of_analysis_ids)}?{ --- 704 Number of persons with at least one drug occurrence, by drug_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 704 as analysis_id, - CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(drug_exposure_start_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(drug_exposure_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.drug_exposure de1 -on p1.person_id = de1.person_id -group by de1.drug_concept_id, - YEAR(drug_exposure_start_date), - p1.gender_concept_id, - floor((year(drug_exposure_start_date) - p1.year_of_birth)/10) -; ---} - ---{705 IN (@list_of_analysis_ids)}?{ --- 705 Number of drug occurrence records, by drug_concept_id by drug_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 705 as analysis_id, - CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(de1.drug_type_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -group by de1.drug_CONCEPT_ID, - de1.drug_type_concept_id -; ---} - - - ---{706 IN (@list_of_analysis_ids)}?{ --- 706 Distribution of age by drug_concept_id -select de1.drug_concept_id as subject_id, - p1.gender_concept_id, - de1.drug_start_year - p1.year_of_birth as count_value -INTO #rawData_706 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, drug_concept_id, min(year(drug_exposure_start_date)) as drug_start_year - from @cdm_database_schema.drug_exposure - group by person_id, drug_concept_id -) de1 on p1.person_id = de1.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_706 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_706 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 706 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - - -truncate table #rawData_706; -drop table #rawData_706; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{709 IN (@list_of_analysis_ids)}?{ --- 709 Number of drug exposure records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 709 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = de1.person_id -where p1.person_id is null -; ---} - - ---{710 IN (@list_of_analysis_ids)}?{ --- 710 Number of drug exposure records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 710 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = de1.person_id - and de1.drug_exposure_start_date >= op1.observation_period_start_date - and de1.drug_exposure_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{711 IN (@list_of_analysis_ids)}?{ --- 711 Number of drug exposure records with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 711 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 -where de1.drug_exposure_end_date < de1.drug_exposure_start_date -; ---} - - ---{712 IN (@list_of_analysis_ids)}?{ --- 712 Number of drug exposure records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 712 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = de1.provider_id -where de1.provider_id is not null - and p1.provider_id is null -; ---} - ---{713 IN (@list_of_analysis_ids)}?{ --- 713 Number of drug exposure records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 713 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_exposure de1 - left join @cdm_database_schema.visit_occurrence vo1 - on de1.visit_occurrence_id = vo1.visit_occurrence_id -where de1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - - - ---{715 IN (@list_of_analysis_ids)}?{ --- 715 Distribution of days_supply by drug_concept_id -with rawData(stratum_id, count_value) as -( - select drug_concept_id, - days_supply as count_value - from @cdm_database_schema.drug_exposure - where days_supply is not null -), -overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum_id -), -statsView (stratum_id, count_value, total, rn) as -( - select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by stratum_id, count_value -), -priorStats (stratum_id, count_value, total, accumulated) as -( - select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn - group by s.stratum_id, s.count_value, s.total, s.rn -) -select 715 as analysis_id, - CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum_id = o.stratum_id -GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{716 IN (@list_of_analysis_ids)}?{ --- 716 Distribution of refills by drug_concept_id -with rawData(stratum_id, count_value) as -( - select drug_concept_id, - refills as count_value - from @cdm_database_schema.drug_exposure - where refills is not null -), -overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum_id -), -statsView (stratum_id, count_value, total, rn) as -( - select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by stratum_id, count_value -), -priorStats (stratum_id, count_value, total, accumulated) as -( - select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn - group by s.stratum_id, s.count_value, s.total, s.rn -) -select 716 as analysis_id, - CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum_id = o.stratum_id -GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{717 IN (@list_of_analysis_ids)}?{ --- 717 Distribution of quantity by drug_concept_id -with rawData(stratum_id, count_value) as -( - select drug_concept_id, - CAST(quantity AS FLOAT) as count_value - from @cdm_database_schema.drug_exposure - where quantity is not null -), -overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum_id -), -statsView (stratum_id, count_value, total, rn) as -( - select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by stratum_id, count_value -), -priorStats (stratum_id, count_value, total, accumulated) as -( - select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn - group by s.stratum_id, s.count_value, s.total, s.rn -) -select 717 as analysis_id, - CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum_id = o.stratum_id -GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - ---{720 IN (@list_of_analysis_ids)}?{ --- 720 Number of drug exposure records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 720 as analysis_id, - CAST(YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.drug_exposure de1 -group by YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) -; ---} - ---{791 IN (@list_of_analysis_ids)}?{ --- 791 Number of total persons that have at least x drug exposures -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select - 791 as analysis_id, - CAST(drug_concept_id AS VARCHAR(255)) as stratum_1, - CAST(drg_cnt AS VARCHAR(255)) as stratum_2, - sum(count(person_id)) over (partition by drug_concept_id order by drg_cnt desc) as count_value -from ( - select - d.drug_concept_id, - count(d.drug_exposure_id) as drg_cnt, - d.person_id - from @cdm_database_schema.drug_exposure d - group by d.person_id, d.drug_concept_id -) cnt_q -group by cnt_q.drug_concept_id, cnt_q.drg_cnt; ---} - -/******************************************** - -ACHILLES Analyses on OBSERVATION table - -*********************************************/ - - - ---{800 IN (@list_of_analysis_ids)}?{ --- 800 Number of persons with at least one observation occurrence, by observation_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 800 as analysis_id, - CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID -; ---} - - ---{801 IN (@list_of_analysis_ids)}?{ --- 801 Number of observation occurrence records, by observation_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 801 as analysis_id, - CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID -; ---} - - - ---{802 IN (@list_of_analysis_ids)}?{ --- 802 Number of persons by observation occurrence start month, by observation_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 802 as analysis_id, - CAST(o1.observation_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(observation_date)*100 + month(observation_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.observation o1 -group by o1.observation_concept_id, - YEAR(observation_date)*100 + month(observation_date) -; ---} - - - ---{803 IN (@list_of_analysis_ids)}?{ --- 803 Number of distinct observation occurrence concepts per person -with rawData(count_value) as -( - select num_observations as count_value - from - ( - select o1.person_id, COUNT_BIG(distinct o1.observation_concept_id) as num_observations - from - @cdm_database_schema.observation o1 - group by o1.person_id - ) t0 -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 803 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; - -drop table #tempResults; - - ---} - - - ---{804 IN (@list_of_analysis_ids)}?{ --- 804 Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 804 as analysis_id, - CAST(o1.observation_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(observation_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(observation_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.observation o1 -on p1.person_id = o1.person_id -group by o1.observation_concept_id, - YEAR(observation_date), - p1.gender_concept_id, - floor((year(observation_date) - p1.year_of_birth)/10) -; ---} - ---{805 IN (@list_of_analysis_ids)}?{ --- 805 Number of observation occurrence records, by observation_concept_id by observation_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 805 as analysis_id, - CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(o1.observation_type_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID, - o1.observation_type_concept_id -; ---} - - - ---{806 IN (@list_of_analysis_ids)}?{ --- 806 Distribution of age by observation_concept_id -select o1.observation_concept_id as subject_id, - p1.gender_concept_id, - o1.observation_start_year - p1.year_of_birth as count_value -INTO #rawData_806 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, observation_concept_id, min(year(observation_date)) as observation_start_year - from @cdm_database_schema.observation - group by person_id, observation_concept_id -) o1 -on p1.person_id = o1.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_806 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_806 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 806 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_806; -drop table #rawData_806; - -truncate table #tempResults; -drop table #tempResults; - - ---} - ---{807 IN (@list_of_analysis_ids)}?{ --- 807 Number of observation occurrence records, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 807 as analysis_id, - CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(o1.unit_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -group by o1.observation_CONCEPT_ID, - o1.unit_concept_id -; ---} - - - - - ---{809 IN (@list_of_analysis_ids)}?{ --- 809 Number of observation records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 809 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = o1.person_id -where p1.person_id is null -; ---} - - ---{810 IN (@list_of_analysis_ids)}?{ --- 810 Number of observation records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 810 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = o1.person_id - and o1.observation_date >= op1.observation_period_start_date - and o1.observation_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - - ---{812 IN (@list_of_analysis_ids)}?{ --- 812 Number of observation records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 812 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.provider p1 - on p1.provider_id = o1.provider_id -where o1.provider_id is not null - and p1.provider_id is null -; ---} - ---{813 IN (@list_of_analysis_ids)}?{ --- 813 Number of observation records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 813 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 - left join @cdm_database_schema.visit_occurrence vo1 - on o1.visit_occurrence_id = vo1.visit_occurrence_id -where o1.visit_occurrence_id is not null - and vo1.visit_occurrence_id is null -; ---} - - ---{814 IN (@list_of_analysis_ids)}?{ --- 814 Number of observation records with no value (numeric, string, or concept) -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 814 as analysis_id, - COUNT_BIG(o1.PERSON_ID) as count_value -from - @cdm_database_schema.observation o1 -where o1.value_as_number is null - and o1.value_as_string is null - and o1.value_as_concept_id is null -; ---} - - ---{815 IN (@list_of_analysis_ids)}?{ --- 815 Distribution of numeric values, by observation_concept_id and unit_concept_id -select observation_concept_id as subject_id, - unit_concept_id, - CAST(value_as_number AS FLOAT) as count_value -INTO #rawData_815 -from @cdm_database_schema.observation o1 -where o1.unit_concept_id is not null - and o1.value_as_number is not null -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - unit_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_815 - group by subject_id, unit_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, unit_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn - FROM #rawData_815 - group by subject_id, unit_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 815 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_815; -drop table #rawData_815; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{816 IN (@list_of_analysis_ids)}?{ --- 816 Distribution of low range, by observation_concept_id and unit_concept_id - ---NOT APPLICABLE FOR OMOP CDM v5 - ---} - - ---{817 IN (@list_of_analysis_ids)}?{ --- 817 Distribution of high range, by observation_concept_id and unit_concept_id - ---NOT APPLICABLE FOR OMOP CDM v5 - ---} - - - ---{818 IN (@list_of_analysis_ids)}?{ --- 818 Number of observation records below/within/above normal range, by observation_concept_id and unit_concept_id - ---NOT APPLICABLE FOR OMOP CDM v5 - ---} - - - ---{820 IN (@list_of_analysis_ids)}?{ --- 820 Number of observation records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 820 as analysis_id, - CAST(YEAR(observation_date)*100 + month(observation_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.observation o1 -group by YEAR(observation_date)*100 + month(observation_date) -; ---} - - - ---{891 IN (@list_of_analysis_ids)}?{ --- 891 Number of total persons that have at least x observations -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select - 891 as analysis_id, - CAST(observation_concept_id AS VARCHAR(255)) as stratum_1, - CAST(obs_cnt AS VARCHAR(255)) as stratum_2, - sum(count(person_id)) over (partition by observation_concept_id order by obs_cnt desc) as count_value -from ( - select - o.observation_concept_id, - count(o.observation_id) as obs_cnt, - o.person_id - from @cdm_database_schema.observation o - group by o.person_id, o.observation_concept_id -) cnt_q -group by cnt_q.observation_concept_id, cnt_q.obs_cnt; ---} - - - -/******************************************** - -ACHILLES Analyses on DRUG_ERA table - -*********************************************/ - - ---{900 IN (@list_of_analysis_ids)}?{ --- 900 Number of persons with at least one drug occurrence, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 900 as analysis_id, - CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 -group by de1.drug_CONCEPT_ID -; ---} - - ---{901 IN (@list_of_analysis_ids)}?{ --- 901 Number of drug occurrence records, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 901 as analysis_id, - CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 -group by de1.drug_CONCEPT_ID -; ---} - - - ---{902 IN (@list_of_analysis_ids)}?{ --- 902 Number of persons by drug occurrence start month, by drug_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 902 as analysis_id, - CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(drug_era_start_date)*100 + month(drug_era_start_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.drug_era de1 -group by de1.drug_concept_id, - YEAR(drug_era_start_date)*100 + month(drug_era_start_date) -; ---} - - - ---{903 IN (@list_of_analysis_ids)}?{ --- 903 Number of distinct drug era concepts per person -with rawData(count_value) as -( - select COUNT_BIG(distinct de1.drug_concept_id) as count_value - from @cdm_database_schema.drug_era de1 - group by de1.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 903 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - - ---{904 IN (@list_of_analysis_ids)}?{ --- 904 Number of persons with at least one drug occurrence, by drug_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 904 as analysis_id, - CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(drug_era_start_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(drug_era_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.drug_era de1 -on p1.person_id = de1.person_id -group by de1.drug_concept_id, - YEAR(drug_era_start_date), - p1.gender_concept_id, - floor((year(drug_era_start_date) - p1.year_of_birth)/10) -; ---} - - - - ---{906 IN (@list_of_analysis_ids)}?{ --- 906 Distribution of age by drug_concept_id -select de.drug_concept_id as subject_id, - p1.gender_concept_id, - de.drug_start_year - p1.year_of_birth as count_value -INTO #rawData_906 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, drug_concept_id, min(year(drug_era_start_date)) as drug_start_year - from @cdm_database_schema.drug_era - group by person_id, drug_concept_id -) de on p1.person_id =de.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_906 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_906 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 906 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - - -truncate table #rawData_906; -drop table #rawData_906; - -truncate table #tempResults; -drop table #tempResults; ---} - - ---{907 IN (@list_of_analysis_ids)}?{ --- 907 Distribution of drug era length, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select drug_concept_id, - datediff(dd,drug_era_start_date, drug_era_end_date) as count_value - from @cdm_database_schema.drug_era de1 -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 907 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{908 IN (@list_of_analysis_ids)}?{ --- 908 Number of drug eras with invalid person -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 908 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = de1.person_id -where p1.person_id is null -; ---} - - ---{909 IN (@list_of_analysis_ids)}?{ --- 909 Number of drug eras outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 909 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = de1.person_id - and de1.drug_era_start_date >= op1.observation_period_start_date - and de1.drug_era_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{910 IN (@list_of_analysis_ids)}?{ --- 910 Number of drug eras with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 910 as analysis_id, - COUNT_BIG(de1.PERSON_ID) as count_value -from - @cdm_database_schema.drug_era de1 -where de1.drug_era_end_date < de1.drug_era_start_date -; ---} - - - ---{920 IN (@list_of_analysis_ids)}?{ --- 920 Number of drug era records by drug era start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 920 as analysis_id, - CAST(YEAR(drug_era_start_date)*100 + month(drug_era_start_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.drug_era de1 -group by YEAR(drug_era_start_date)*100 + month(drug_era_start_date) -; ---} - - - - - -/******************************************** - -ACHILLES Analyses on CONDITION_ERA table - -*********************************************/ - - ---{1000 IN (@list_of_analysis_ids)}?{ --- 1000 Number of persons with at least one condition occurrence, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1000 as analysis_id, - CAST(ce1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 -group by ce1.condition_CONCEPT_ID -; ---} - - ---{1001 IN (@list_of_analysis_ids)}?{ --- 1001 Number of condition occurrence records, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1001 as analysis_id, - CAST(ce1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 -group by ce1.condition_CONCEPT_ID -; ---} - - - ---{1002 IN (@list_of_analysis_ids)}?{ --- 1002 Number of persons by condition occurrence start month, by condition_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 1002 as analysis_id, - CAST(ce1.condition_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(condition_era_start_date)*100 + month(condition_era_start_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from -@cdm_database_schema.condition_era ce1 -group by ce1.condition_concept_id, - YEAR(condition_era_start_date)*100 + month(condition_era_start_date) -; ---} - - - ---{1003 IN (@list_of_analysis_ids)}?{ --- 1003 Number of distinct condition era concepts per person -with rawData(count_value) as -( - select COUNT_BIG(distinct ce1.condition_concept_id) as count_value - from @cdm_database_schema.condition_era ce1 - group by ce1.person_id -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 1003 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{1004 IN (@list_of_analysis_ids)}?{ --- 1004 Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 1004 as analysis_id, - CAST(ce1.condition_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(condition_era_start_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(condition_era_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join -@cdm_database_schema.condition_era ce1 -on p1.person_id = ce1.person_id -group by ce1.condition_concept_id, - YEAR(condition_era_start_date), - p1.gender_concept_id, - floor((year(condition_era_start_date) - p1.year_of_birth)/10) -; ---} - - - - ---{1006 IN (@list_of_analysis_ids)}?{ --- 1006 Distribution of age by condition_concept_id -select ce.condition_concept_id as subject_id, - p1.gender_concept_id, - ce.condition_start_year - p1.year_of_birth as count_value -INTO #rawData_1006 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, condition_concept_id, min(year(condition_era_start_date)) as condition_start_year - from @cdm_database_schema.condition_era - group by person_id, condition_concept_id -) ce on p1.person_id = ce.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_1006 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_1006 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 1006 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_1006; -drop table #rawData_1006; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{1007 IN (@list_of_analysis_ids)}?{ --- 1007 Distribution of condition era length, by condition_concept_id -with rawData(stratum1_id, count_value) as -( - select condition_concept_id as stratum1_id, - datediff(dd,condition_era_start_date, condition_era_end_date) as count_value - from @cdm_database_schema.condition_era ce1 -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1007 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - - ---{1008 IN (@list_of_analysis_ids)}?{ --- 1008 Number of condition eras with invalid person -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1008 as analysis_id, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 - left join @cdm_database_schema.PERSON p1 - on p1.person_id = ce1.person_id -where p1.person_id is null -; ---} - - ---{1009 IN (@list_of_analysis_ids)}?{ --- 1009 Number of condition eras outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1009 as analysis_id, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 - left join @cdm_database_schema.observation_period op1 - on op1.person_id = ce1.person_id - and ce1.condition_era_start_date >= op1.observation_period_start_date - and ce1.condition_era_start_date <= op1.observation_period_end_date -where op1.person_id is null -; ---} - - ---{1010 IN (@list_of_analysis_ids)}?{ --- 1010 Number of condition eras with end date < start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1010 as analysis_id, - COUNT_BIG(ce1.PERSON_ID) as count_value -from - @cdm_database_schema.condition_era ce1 -where ce1.condition_era_end_date < ce1.condition_era_start_date -; ---} - - ---{1020 IN (@list_of_analysis_ids)}?{ --- 1020 Number of drug era records by drug era start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1020 as analysis_id, - CAST(YEAR(condition_era_start_date)*100 + month(condition_era_start_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from -@cdm_database_schema.condition_era ce1 -group by YEAR(condition_era_start_date)*100 + month(condition_era_start_date) -; ---} - - - - -/******************************************** - -ACHILLES Analyses on LOCATION table - -*********************************************/ - ---{1100 IN (@list_of_analysis_ids)}?{ --- 1100 Number of persons by location 3-digit zip -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1100 as analysis_id, - CAST(left(l1.zip,3) AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.LOCATION l1 - on p1.location_id = l1.location_id -where p1.location_id is not null - and l1.zip is not null -group by left(l1.zip,3); ---} - - ---{1101 IN (@list_of_analysis_ids)}?{ --- 1101 Number of persons by location state -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1101 as analysis_id, - CAST(l1.state AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.LOCATION l1 - on p1.location_id = l1.location_id -where p1.location_id is not null - and l1.state is not null -group by l1.state; ---} - - ---{1102 IN (@list_of_analysis_ids)}?{ --- 1102 Number of care sites by location 3-digit zip -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1102 as analysis_id, - CAST(left(l1.zip,3) AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct care_site_id) as count_value -from @cdm_database_schema.care_site cs1 - inner join @cdm_database_schema.LOCATION l1 - on cs1.location_id = l1.location_id -where cs1.location_id is not null - and l1.zip is not null -group by left(l1.zip,3); ---} - - ---{1103 IN (@list_of_analysis_ids)}?{ --- 1103 Number of care sites by location state -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1103 as analysis_id, - CAST(l1.state AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct care_site_id) as count_value -from @cdm_database_schema.care_site cs1 - inner join @cdm_database_schema.LOCATION l1 - on cs1.location_id = l1.location_id -where cs1.location_id is not null - and l1.state is not null -group by l1.state; ---} - - -/******************************************** - -ACHILLES Analyses on CARE_SITE table - -*********************************************/ - - ---{1200 IN (@list_of_analysis_ids)}?{ --- 1200 Number of persons by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1200 as analysis_id, - CAST(cs1.place_of_service_concept_id AS VARCHAR(255)) as stratum_1, COUNT_BIG(person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.care_site cs1 - on p1.care_site_id = cs1.care_site_id -where p1.care_site_id is not null - and cs1.place_of_service_concept_id is not null -group by cs1.place_of_service_concept_id; ---} - - ---{1201 IN (@list_of_analysis_ids)}?{ --- 1201 Number of visits by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1201 as analysis_id, - CAST(cs1.place_of_service_concept_id AS VARCHAR(255)) as stratum_1, COUNT_BIG(visit_occurrence_id) as count_value -from @cdm_database_schema.visit_occurrence vo1 - inner join @cdm_database_schema.care_site cs1 - on vo1.care_site_id = cs1.care_site_id -where vo1.care_site_id is not null - and cs1.place_of_service_concept_id is not null -group by cs1.place_of_service_concept_id; ---} - - ---{1202 IN (@list_of_analysis_ids)}?{ --- 1202 Number of care sites by place of service -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1202 as analysis_id, - CAST(cs1.place_of_service_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(care_site_id) as count_value -from @cdm_database_schema.care_site cs1 -where cs1.place_of_service_concept_id is not null -group by cs1.place_of_service_concept_id; ---} - - -/******************************************** - -ACHILLES Analyses on ORGANIZATION table - -*********************************************/ - ---{1300 IN (@list_of_analysis_ids)}?{ --- 1300 Number of organizations by place of service - ---NOT APPLICABLE IN CDMv5 - ---} - - - - - -/******************************************** - -ACHILLES Analyses on PAYOR_PLAN_PERIOD table - -*********************************************/ - - ---{1406 IN (@list_of_analysis_ids)}?{ --- 1406 Length of payer plan (days) of first payer plan period by gender -with rawData(stratum1_id, count_value) as -( - select p1.gender_concept_id as stratum1_id, - DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date) as count_value - from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - payer_plan_period_START_DATE, - payer_plan_period_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 - from @cdm_database_schema.payer_plan_period - ) ppp1 - on p1.PERSON_ID = ppp1.PERSON_ID - where ppp1.rn1 = 1 -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1406 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - - ---{1407 IN (@list_of_analysis_ids)}?{ --- 1407 Length of payer plan (days) of first payer plan period by age decile -with rawData(stratum_id, count_value) as -( - select floor((year(ppp1.payer_plan_period_START_DATE) - p1.YEAR_OF_BIRTH)/10) as stratum_id, - DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date) as count_value - from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - payer_plan_period_START_DATE, - payer_plan_period_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 - from @cdm_database_schema.payer_plan_period - ) ppp1 - on p1.PERSON_ID = ppp1.PERSON_ID - where ppp1.rn1 = 1 -), -overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM rawData - group by stratum_id -), -statsView (stratum_id, count_value, total, rn) as -( - select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn - FROM rawData - group by stratum_id, count_value -), -priorStats (stratum_id, count_value, total, accumulated) as -( - select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn - group by s.stratum_id, s.count_value, s.total, s.rn -) -select 1407 as analysis_id, - CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum_id = o.stratum_id -GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - - - ---{1408 IN (@list_of_analysis_ids)}?{ --- 1408 Number of persons by length of payer plan period, in 30d increments -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1408 as analysis_id, - CAST(floor(DATEDIFF(dd, ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)/30) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct p1.person_id) as count_value -from @cdm_database_schema.PERSON p1 - inner join - (select person_id, - payer_plan_period_START_DATE, - payer_plan_period_END_DATE, - ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 - from @cdm_database_schema.payer_plan_period - ) ppp1 - on p1.PERSON_ID = ppp1.PERSON_ID - where ppp1.rn1 = 1 -group by CAST(floor(DATEDIFF(dd, ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)/30) AS VARCHAR(255)) -; ---} - - ---{1409 IN (@list_of_analysis_ids)}?{ --- 1409 Number of persons with continuous payer plan in each year --- Note: using temp table instead of nested query because this gives vastly improved - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -select distinct - YEAR(payer_plan_period_start_date) as obs_year -INTO - #temp_dates -from - @cdm_database_schema.payer_plan_period -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1409 as analysis_id, - CAST(t1.obs_year AS VARCHAR(255)) as stratum_1, COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join - @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id - , - #temp_dates t1 -where year(ppp1.payer_plan_period_START_DATE) <= t1.obs_year - and year(ppp1.payer_plan_period_END_DATE) >= t1.obs_year -group by t1.obs_year -; - -truncate table #temp_dates; -drop table #temp_dates; ---} - - ---{1410 IN (@list_of_analysis_ids)}?{ --- 1410 Number of persons with continuous payer plan in each month --- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle - -IF OBJECT_ID('tempdb..#temp_dates', 'U') IS NOT NULL - DROP TABLE #temp_dates; - -SELECT DISTINCT - YEAR(payer_plan_period_start_date)*100 + MONTH(payer_plan_period_start_date) AS obs_month, - DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_start_date), 1) AS obs_month_start, - EOMONTH(payer_plan_period_start_date) AS obs_month_end -INTO - #temp_dates -FROM - @cdm_database_schema.payer_plan_period -; - -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select - 1410 as analysis_id, - CAST(obs_month AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join - @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id - , - #temp_dates -where ppp1.payer_plan_period_START_DATE <= obs_month_start - and ppp1.payer_plan_period_END_DATE >= obs_month_end -group by obs_month -; - -TRUNCATE TABLE #temp_dates; -DROP TABLE #temp_dates; ---} - - - ---{1411 IN (@list_of_analysis_ids)}?{ --- 1411 Number of persons by payer plan period start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1411 as analysis_id, - CAST(DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_START_DATE), 1) AS VARCHAR(255)) AS stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id -group by DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_START_DATE), 1) -; ---} - - - ---{1412 IN (@list_of_analysis_ids)}?{ --- 1412 Number of persons by payer plan period end month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1412 as analysis_id, - CAST(DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_START_DATE), 1) AS VARCHAR(255)) AS stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join @cdm_database_schema.payer_plan_period ppp1 - on p1.person_id = ppp1.person_id -group by DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_START_DATE), 1) -; ---} - - ---{1413 IN (@list_of_analysis_ids)}?{ --- 1413 Number of persons by number of payer plan periods -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1413 as analysis_id, - CAST(ppp1.num_periods AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join (select person_id, COUNT_BIG(payer_plan_period_start_date) as num_periods from @cdm_database_schema.payer_plan_period group by PERSON_ID) ppp1 - on p1.person_id = ppp1.person_id -group by ppp1.num_periods -; ---} - ---{1414 IN (@list_of_analysis_ids)}?{ --- 1414 Number of persons with payer plan period before year-of-birth -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1414 as analysis_id, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from - @cdm_database_schema.PERSON p1 - inner join (select person_id, MIN(year(payer_plan_period_start_date)) as first_obs_year from @cdm_database_schema.payer_plan_period group by PERSON_ID) ppp1 - on p1.person_id = ppp1.person_id -where p1.year_of_birth > ppp1.first_obs_year -; ---} - ---{1415 IN (@list_of_analysis_ids)}?{ --- 1415 Number of persons with payer plan period end < start -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1415 as analysis_id, - COUNT_BIG(ppp1.PERSON_ID) as count_value -from - @cdm_database_schema.payer_plan_period ppp1 -where ppp1.payer_plan_period_end_date < ppp1.payer_plan_period_start_date -; ---} - -{ @runCostAnalysis }?{ - -/******************************************** - -ACHILLES Analyses on DRUG_COST table - -*********************************************/ - --- for performance optimization, we create a table with drug costs pre-cached for the 15XX analysis - - --- {1502 in (@list_of_analysis_ids) | 1503 in (@list_of_analysis_ids) | 1504 in (@list_of_analysis_ids) | 1505 in (@list_of_analysis_ids) | 1506 in (@list_of_analysis_ids) | 1507 in (@list_of_analysis_ids) | 1508 in (@list_of_analysis_ids) | 1509 in (@list_of_analysis_ids) | 1510 in (@list_of_analysis_ids) | 1511 in (@list_of_analysis_ids)}?{ - -IF OBJECT_ID('@results_database_schema.ACHILLES_drug_cost_raw', 'U') IS NOT NULL - DROP TABLE @results_database_schema.ACHILLES_drug_cost_raw; - -select drug_concept_id as subject_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid, - ingredient_cost, - dispensing_fee, - average_wholesale_price -INTO @results_database_schema.ACHILLES_drug_cost_raw -from @cdm_database_schema.drug_cost dc1 -join @cdm_database_schema.drug_exposure de1 on de1.drug_exposure_id = dc1.drug_exposure_id and drug_concept_id <> 0 -; ---} - - - ---{1500 IN (@list_of_analysis_ids)}?{ --- 1500 Number of drug cost records with invalid drug exposure id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1500 as analysis_id, - COUNT_BIG(dc1.drug_cost_ID) as count_value -from - @cdm_database_schema.drug_cost dc1 - left join @cdm_database_schema.drug_exposure de1 - on dc1.drug_exposure_id = de1.drug_exposure_id -where de1.drug_exposure_id is null -; ---} - ---{1501 IN (@list_of_analysis_ids)}?{ --- 1501 Number of drug cost records with invalid payer plan period id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1501 as analysis_id, - COUNT_BIG(dc1.drug_cost_ID) as count_value -from - @cdm_database_schema.drug_cost dc1 - left join @cdm_database_schema.payer_plan_period ppp1 - on dc1.payer_plan_period_id = ppp1.payer_plan_period_id -where dc1.payer_plan_period_id is not null - and ppp1.payer_plan_period_id is null -; ---} - - ---{1502 IN (@list_of_analysis_ids)}?{ --- 1502 Distribution of paid copay, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_copay as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where paid_copay is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1502 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - - ---{1503 IN (@list_of_analysis_ids)}?{ --- 1503 Distribution of paid coinsurance, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_coinsurance as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where paid_coinsurance is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1503 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - ---{1504 IN (@list_of_analysis_ids)}?{ --- 1504 Distribution of paid toward deductible, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_toward_deductible as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where paid_toward_deductible is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1504 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - ---{1505 IN (@list_of_analysis_ids)}?{ --- 1505 Distribution of paid by payer, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_by_payer as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where paid_by_payer is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1505 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - ---{1506 IN (@list_of_analysis_ids)}?{ --- 1506 Distribution of paid by coordination of benefit, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_by_coordination_benefits as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where paid_by_coordination_benefits is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1506 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{1507 IN (@list_of_analysis_ids)}?{ --- 1507 Distribution of total out-of-pocket, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - total_out_of_pocket as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where total_out_of_pocket is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1507 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{1508 IN (@list_of_analysis_ids)}?{ --- 1508 Distribution of total paid, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - total_paid as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where total_paid is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1508 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{1509 IN (@list_of_analysis_ids)}?{ --- 1509 Distribution of ingredient_cost, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - ingredient_cost as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where ingredient_cost is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1509 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{1510 IN (@list_of_analysis_ids)}?{ --- 1510 Distribution of dispensing fee, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - dispensing_fee as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where dispensing_fee is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1510 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; ---} - ---{1511 IN (@list_of_analysis_ids)}?{ --- 1511 Distribution of average wholesale price, by drug_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - average_wholesale_price as count_value - from @results_database_schema.ACHILLES_drug_cost_raw - where average_wholesale_price is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1511 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - - ---} - -{1502 in (@list_of_analysis_ids) | 1503 in (@list_of_analysis_ids) | 1504 in (@list_of_analysis_ids) | 1505 in (@list_of_analysis_ids) | 1506 in (@list_of_analysis_ids) | 1507 in (@list_of_analysis_ids) | 1508 in (@list_of_analysis_ids) | 1509 in (@list_of_analysis_ids) | 1510 in (@list_of_analysis_ids) | 1511 in (@list_of_analysis_ids)}?{ --- clean up cached table -DROP TABLE @results_database_schema.ACHILLES_drug_cost_raw; -} - -/******************************************** - -ACHILLES Analyses on PROCEDURE_COST table - -*********************************************/ - -{(1602 in (@list_of_analysis_ids) | 1603 in (@list_of_analysis_ids) | 1604 in (@list_of_analysis_ids) | 1605 in (@list_of_analysis_ids) | 1606 in (@list_of_analysis_ids) | 1607 in (@list_of_analysis_ids) | 1608 in (@list_of_analysis_ids))}?{ - -IF OBJECT_ID('@results_database_schema.ACHILLES_procedure_cost_raw', 'U') IS NOT NULL - DROP TABLE @results_database_schema.ACHILLES_procedure_cost_raw; - -select procedure_concept_id as subject_id, - paid_copay, - paid_coinsurance, - paid_toward_deductible, - paid_by_payer, - paid_by_coordination_benefits, - total_out_of_pocket, - total_paid -INTO @results_database_schema.ACHILLES_procedure_cost_raw -from @cdm_database_schema.procedure_cost pc1 -join @cdm_database_schema.procedure_occurrence po1 on pc1.procedure_occurrence_id = po1.procedure_occurrence_id and procedure_concept_id <> 0 -; -} - - ---{1600 IN (@list_of_analysis_ids)}?{ --- 1600 Number of procedure cost records with invalid procedure exposure id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1600 as analysis_id, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 - left join @cdm_database_schema.procedure_occurrence po1 - on pc1.procedure_occurrence_id = po1.procedure_occurrence_id -where po1.procedure_occurrence_id is null -; ---} - ---{1601 IN (@list_of_analysis_ids)}?{ --- 1601 Number of procedure cost records with invalid payer plan period id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1601 as analysis_id, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 - left join @cdm_database_schema.payer_plan_period ppp1 - on pc1.payer_plan_period_id = ppp1.payer_plan_period_id -where pc1.payer_plan_period_id is not null - and ppp1.payer_plan_period_id is null -; ---} - - ---{1602 IN (@list_of_analysis_ids)}?{ --- 1602 Distribution of paid copay, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_copay as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where paid_copay is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1602 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{1603 IN (@list_of_analysis_ids)}?{ --- 1603 Distribution of paid coinsurance, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_coinsurance as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where paid_coinsurance is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, -CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1603 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{1604 IN (@list_of_analysis_ids)}?{ --- 1604 Distribution of paid toward deductible, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_toward_deductible as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where paid_toward_deductible is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1604 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{1605 IN (@list_of_analysis_ids)}?{ --- 1605 Distribution of paid by payer, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_by_payer as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where paid_by_payer is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1605 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{1606 IN (@list_of_analysis_ids)}?{ --- 1606 Distribution of paid by coordination of benefit, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - paid_by_coordination_benefits as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where paid_by_coordination_benefits is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1606 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - ---{1607 IN (@list_of_analysis_ids)}?{ --- 1607 Distribution of total out-of-pocket, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - total_out_of_pocket as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where total_out_of_pocket is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1607 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{1608 IN (@list_of_analysis_ids)}?{ --- 1608 Distribution of total paid, by procedure_concept_id -with rawData(stratum1_id, count_value) as -( - select subject_id as stratum1_id, - total_paid as count_value - from @results_database_schema.ACHILLES_procedure_cost_raw - where total_paid is not null -), -overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as -( - select stratum1_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData - group by stratum1_id -), -statsView (stratum1_id, count_value, total, rn) as -( - select stratum1_id, - count_value, - count_big(*) as total, - row_number() over (partition by stratum1_id order by count_value) as rn - FROM rawData - group by stratum1_id, count_value -), -priorStats (stratum1_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn - group by s.stratum1_id, s.count_value, s.total, s.rn -) -select 1608 as analysis_id, - CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id -GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum_1, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; -drop table #tempResults; ---} - - ---{1609 IN (@list_of_analysis_ids)}?{ --- 1609 Number of records by disease_class_concept_id - ---not applicable for OMOP CDMv5 - ---} - - ---{1610 IN (@list_of_analysis_ids)}?{ --- 1610 Number of records by revenue_code_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1610 as analysis_id, - CAST(revenue_code_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(pc1.procedure_cost_ID) as count_value -from - @cdm_database_schema.procedure_cost pc1 -where revenue_code_concept_id is not null -group by revenue_code_concept_id -; ---} - -{(1602 in (@list_of_analysis_ids) | 1603 in (@list_of_analysis_ids) | 1604 in (@list_of_analysis_ids) | 1605 in (@list_of_analysis_ids) | 1606 in (@list_of_analysis_ids) | 1607 in (@list_of_analysis_ids) | 1608 in (@list_of_analysis_ids))}?{ --- clean up cached table -DROP TABLE @results_database_schema.ACHILLES_procedure_cost_raw; -} - --- End Cost Analysis -} - -/******************************************** - -ACHILLES Analyses on COHORT table - -*********************************************/ - - ---{1700 IN (@list_of_analysis_ids)}?{ --- 1700 Number of records by cohort_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1700 as analysis_id, - CAST(cohort_definition_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(subject_ID) as count_value -from - @cdm_database_schema.cohort c1 -group by cohort_definition_id -; ---} - - ---{1701 IN (@list_of_analysis_ids)}?{ --- 1701 Number of records with cohort end date < cohort start date -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1701 as analysis_id, - COUNT_BIG(subject_ID) as count_value -from - @cdm_database_schema.cohort c1 -where c1.cohort_end_date < c1.cohort_start_date -; ---} - -/******************************************** - -ACHILLES Analyses on MEASUREMENT table - -*********************************************/ - - - ---{1800 IN (@list_of_analysis_ids)}?{ --- 1800 Number of persons with at least one measurement occurrence, by measurement_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1800 as analysis_id, - CAST(m.measurement_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct m.PERSON_ID) as count_value -from - @cdm_database_schema.measurement m -group by m.measurement_CONCEPT_ID -; ---} - - ---{1801 IN (@list_of_analysis_ids)}?{ --- 1801 Number of measurement occurrence records, by measurement_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1801 as analysis_id, - CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, - COUNT_BIG(m.PERSON_ID) as count_value -from - @cdm_database_schema.measurement m -group by m.measurement_CONCEPT_ID -; ---} - - - ---{1802 IN (@list_of_analysis_ids)}?{ --- 1802 Number of persons by measurement occurrence start month, by measurement_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 1802 as analysis_id, - CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(measurement_date)*100 + month(measurement_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from - @cdm_database_schema.measurement m -group by m.measurement_concept_id, - YEAR(measurement_date)*100 + month(measurement_date) -; ---} - - - ---{1803 IN (@list_of_analysis_ids)}?{ --- 1803 Number of distinct measurement occurrence concepts per person -with rawData(count_value) as -( - select num_measurements as count_value - from - ( - select m.person_id, COUNT_BIG(distinct m.measurement_concept_id) as num_measurements - from - @cdm_database_schema.measurement m - group by m.person_id - ) t0 -), -overallStats (avg_value, stdev_value, min_value, max_value, total) as -( - select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - from rawData -), -statsView (count_value, total, rn) as -( - select count_value, - count_big(*) as total, - row_number() over (order by count_value) as rn - FROM rawData - group by count_value -), -priorStats (count_value, total, accumulated) as -( - select s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on p.rn <= s.rn - group by s.count_value, s.total, s.rn -) -select 1803 as analysis_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -CROSS JOIN overallStats o -GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #tempResults; - -drop table #tempResults; - - ---} - - - ---{1804 IN (@list_of_analysis_ids)}?{ --- 1804 Number of persons with at least one measurement occurrence, by measurement_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 1804 as analysis_id, - CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, - CAST(YEAR(measurement_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(measurement_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join @cdm_database_schema.measurement m on p1.person_id = m.person_id -group by m.measurement_concept_id, - YEAR(measurement_date), - p1.gender_concept_id, - floor((year(measurement_date) - p1.year_of_birth)/10) -; ---} - ---{1805 IN (@list_of_analysis_ids)}?{ --- 1805 Number of measurement records, by measurement_concept_id by measurement_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 1805 as analysis_id, - CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, - CAST(m.measurement_type_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m -group by m.measurement_concept_id, - m.measurement_type_concept_id -; ---} - - - ---{1806 IN (@list_of_analysis_ids)}?{ --- 1806 Distribution of age by measurement_concept_id -select o1.measurement_concept_id as subject_id, - p1.gender_concept_id, - o1.measurement_start_year - p1.year_of_birth as count_value -INTO #rawData_1806 -from @cdm_database_schema.PERSON p1 -inner join -( - select person_id, measurement_concept_id, min(year(measurement_date)) as measurement_start_year - from @cdm_database_schema.measurement - group by person_id, measurement_concept_id -) o1 -on p1.person_id = o1.person_id -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - gender_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_1806 - group by subject_id, gender_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn - FROM #rawData_1806 - group by subject_id, gender_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 1806 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_1806; -drop table #rawData_1806; - -truncate table #tempResults; -drop table #tempResults; - - ---} - ---{1807 IN (@list_of_analysis_ids)}?{ --- 1807 Number of measurement occurrence records, by measurement_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 1807 as analysis_id, - CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, - CAST(m.unit_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m -group by m.measurement_concept_id, m.unit_concept_id -; ---} - - - ---{1809 IN (@list_of_analysis_ids)}?{ --- 1809 Number of measurement records with invalid person_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1809 as analysis_id, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m - left join @cdm_database_schema.PERSON p1 on p1.person_id = m.person_id -where p1.person_id is null -; ---} - - ---{1810 IN (@list_of_analysis_ids)}?{ --- 1810 Number of measurement records outside valid observation period -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1810 as analysis_id, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m - left join @cdm_database_schema.observation_period op on op.person_id = m.person_id - and m.measurement_date >= op.observation_period_start_date - and m.measurement_date <= op.observation_period_end_date -where op.person_id is null -; ---} - - - ---{1812 IN (@list_of_analysis_ids)}?{ --- 1812 Number of measurement records with invalid provider_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1812 as analysis_id, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m - left join @cdm_database_schema.provider p on p.provider_id = m.provider_id -where m.provider_id is not null - and p.provider_id is null -; ---} - ---{1813 IN (@list_of_analysis_ids)}?{ --- 1813 Number of observation records with invalid visit_id -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1813 as analysis_id, COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m - left join @cdm_database_schema.visit_occurrence vo on m.visit_occurrence_id = vo.visit_occurrence_id -where m.visit_occurrence_id is not null - and vo.visit_occurrence_id is null -; ---} - - ---{1814 IN (@list_of_analysis_ids)}?{ --- 1814 Number of measurement records with no value (numeric or concept) -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1814 as analysis_id, - COUNT_BIG(m.PERSON_ID) as count_value -from - @cdm_database_schema.measurement m -where m.value_as_number is null - and m.value_as_concept_id is null -; ---} - - ---{1815 IN (@list_of_analysis_ids)}?{ --- 1815 Distribution of numeric values, by measurement_concept_id and unit_concept_id -select measurement_concept_id as subject_id, - unit_concept_id, - CAST(value_as_number AS FLOAT) as count_value -INTO #rawData_1815 -from @cdm_database_schema.measurement m -where m.unit_concept_id is not null - and m.value_as_number is not null -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - unit_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_1815 - group by subject_id, unit_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, unit_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn - FROM #rawData_1815 - group by subject_id, unit_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 1815 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_1815; -drop table #rawData_1815; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{1816 IN (@list_of_analysis_ids)}?{ --- 1816 Distribution of low range, by measurement_concept_id and unit_concept_id -select measurement_concept_id as subject_id, - unit_concept_id, - CAST(range_low AS FLOAT) as count_value -INTO #rawData_1816 -from @cdm_database_schema.measurement m -where m.unit_concept_id is not null - and m.value_as_number is not null - and m.range_low is not null - and m.range_high is not null -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - unit_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_1816 - group by subject_id, unit_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, unit_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn - FROM #rawData_1816 - group by subject_id, unit_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 1816 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_1816; -drop table #rawData_1816; - -truncate table #tempResults; -drop table #tempResults; - ---} - - ---{1817 IN (@list_of_analysis_ids)}?{ --- 1817 Distribution of high range, by observation_concept_id and unit_concept_id -select measurement_concept_id as subject_id, - unit_concept_id, - CAST(range_high AS FLOAT) as count_value -INTO #rawData_1817 -from @cdm_database_schema.measurement m -where m.unit_concept_id is not null - and m.value_as_number is not null - and m.range_low is not null - and m.range_high is not null -; - -with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as -( - select subject_id as stratum1_id, - unit_concept_id as stratum2_id, - CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, - CAST(stdev(count_value) AS FLOAT) as stdev_value, - min(count_value) as min_value, - max(count_value) as max_value, - count_big(*) as total - FROM #rawData_1817 - group by subject_id, unit_concept_id -), -statsView (stratum1_id, stratum2_id, count_value, total, rn) as -( - select subject_id as stratum1_id, unit_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn - FROM #rawData_1817 - group by subject_id, unit_concept_id, count_value -), -priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as -( - select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated - from statsView s - join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn - group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn -) -select 1817 as analysis_id, - CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, - CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, - o.total as count_value, - o.min_value, - o.max_value, - o.avg_value, - o.stdev_value, - MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, - MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, - MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, - MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, - MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value -into #tempResults -from priorStats p -join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id -GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value -; - -insert into @results_database_schema.ACHILLES_results_dist (analysis_id, stratum_1, stratum_2, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value) -select analysis_id, stratum1_id, stratum2_id, count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value -from #tempResults -; - -truncate table #rawData_1817; -drop table #rawData_1817; - -truncate table #tempResults; -drop table #tempResults; - ---} - - - ---{1818 IN (@list_of_analysis_ids)}?{ --- 1818 Number of observation records below/within/above normal range, by observation_concept_id and unit_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, count_value) -select 1818 as analysis_id, - CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, - CAST(m.unit_concept_id AS VARCHAR(255)) as stratum_2, - CAST(case when m.value_as_number < m.range_low then 'Below Range Low' - when m.value_as_number >= m.range_low and m.value_as_number <= m.range_high then 'Within Range' - when m.value_as_number > m.range_high then 'Above Range High' - else 'Other' end AS VARCHAR(255)) as stratum_3, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.measurement m -where m.value_as_number is not null - and m.unit_concept_id is not null - and m.range_low is not null - and m.range_high is not null -group by measurement_concept_id, - unit_concept_id, - CAST(case when m.value_as_number < m.range_low then 'Below Range Low' - when m.value_as_number >= m.range_low and m.value_as_number <= m.range_high then 'Within Range' - when m.value_as_number > m.range_high then 'Above Range High' - else 'Other' end AS VARCHAR(255)) -; ---} - - - - ---{1820 IN (@list_of_analysis_ids)}?{ --- 1820 Number of observation records by condition occurrence start month -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 1820 as analysis_id, - CAST(YEAR(measurement_date)*100 + month(measurement_date) AS VARCHAR(255)) as stratum_1, - COUNT_BIG(PERSON_ID) as count_value -from @cdm_database_schema.measurement m -group by YEAR(measurement_date)*100 + month(measurement_date) -; ---} - ---{1821 IN (@list_of_analysis_ids)}?{ --- 1821 Number of measurement records with no numeric value -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 1821 as analysis_id, - COUNT_BIG(m.PERSON_ID) as count_value -from - @cdm_database_schema.measurement m -where m.value_as_number is null -; ---} - - ---{1891 IN (@list_of_analysis_ids)}?{ --- 1891 Number of total persons that have at least x measurements -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select - 1891 as analysis_id, - CAST(measurement_concept_id AS VARCHAR(255)) as stratum_1, - CAST(meas_cnt AS VARCHAR(255)) as stratum_2, - sum(count(person_id)) over (partition by measurement_concept_id order by meas_cnt desc) as count_value -from ( - select - m.measurement_concept_id, - count(m.measurement_id) as meas_cnt, - m.person_id - from @cdm_database_schema.measurement m - group by m.person_id, m.measurement_concept_id -) cnt_q -group by cnt_q.measurement_concept_id, cnt_q.meas_cnt; ---} ---end of measurement analyses - -/******************************************** - -Reports - -*********************************************/ - - ---{1900 IN (@list_of_analysis_ids)}?{ --- 1900 concept_0 report - -INSERT INTO @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 1900 as analysis_id, CAST(table_name AS VARCHAR(255)) as stratum_1, CAST(source_value AS VARCHAR(255)) as stratum_2, cnt as count_value - from ( -select 'measurement' as table_name,measurement_source_value as source_value, COUNT_BIG(*) as cnt from @cdm_database_schema.measurement where measurement_concept_id = 0 group by measurement_source_value -union -select 'procedure_occurrence' as table_name,procedure_source_value as source_value, COUNT_BIG(*) as cnt from @cdm_database_schema.procedure_occurrence where procedure_concept_id = 0 group by procedure_source_value -union -select 'drug_exposure' as table_name,drug_source_value as source_value, COUNT_BIG(*) as cnt from @cdm_database_schema.drug_exposure where drug_concept_id = 0 group by drug_source_value -union -select 'condition_occurrence' as table_name,condition_source_value as source_value, COUNT_BIG(*) as cnt from @cdm_database_schema.condition_occurrence where condition_concept_id = 0 group by condition_source_value -) a -where cnt >= 1 --use other threshold if needed (e.g., 10) ---order by a.table_name desc, cnt desc -; ---} - - -/******************************************** - -ACHILLES Iris Analyses - -*********************************************/ ---starting at id 2000 - ---{2000 IN (@list_of_analysis_ids)}?{ --- 2000 patients with at least 1 Dx and 1 Rx -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 2000 as analysis_id, ---gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value - CAST(a.cnt AS BIGINT) AS count_value - FROM ( - select COUNT_BIG(*) cnt from ( - select distinct person_id from @cdm_database_schema.condition_occurrence - intersect - select distinct person_id from @cdm_database_schema.drug_exposure - ) b - ) a - ; ---} - - - ---{2001 IN (@list_of_analysis_ids)}?{ --- 2001 patients with at least 1 Dx and 1 Proc -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 2001 as analysis_id, ---gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value - CAST(a.cnt AS BIGINT) AS count_value - FROM ( - select COUNT_BIG(*) cnt from ( - select distinct person_id from @cdm_database_schema.condition_occurrence - intersect - select distinct person_id from @cdm_database_schema.procedure_occurrence - ) b - ) a - ; ---} - - - ---{2002 IN (@list_of_analysis_ids)}?{ --- 2002 patients with at least 1 Mes and 1 Dx and 1 Rx -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 2002 as analysis_id, ---gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value - CAST(a.cnt AS BIGINT) AS count_value - FROM ( - select COUNT_BIG(*) cnt from ( - select distinct person_id from @cdm_database_schema.measurement - intersect - select distinct person_id from @cdm_database_schema.condition_occurrence - intersect - select distinct person_id from @cdm_database_schema.drug_exposure - ) b - ) a - ; ---} - - ---{2003 IN (@list_of_analysis_ids)}?{ --- 2003 Patients with at least one visit --- this analysis is in fact redundant, since it is possible to get it via --- dist analysis 203 and query select count_value from achilles_results_dist where analysis_id = 203; -insert into @results_database_schema.ACHILLES_results (analysis_id, count_value) -select 2003 as analysis_id, COUNT_BIG(distinct person_id) as count_value -from @cdm_database_schema.visit_occurrence; ---} - - -/******************************************** - -ACHILLES Analyses on DEVICE_EXPOSURE table - -*********************************************/ - - - ---{2100 IN (@list_of_analysis_ids)}?{ --- 2100 Number of persons with at least one device exposure , by device_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 2100 as analysis_id, - CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct m.PERSON_ID) as count_value -from - @cdm_database_schema.device_exposure m -group by m.device_CONCEPT_ID -; ---} - - ---{2101 IN (@list_of_analysis_ids)}?{ --- 2101 Number of device exposure records, by device_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 2101 as analysis_id, - CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(m.PERSON_ID) as count_value -from - @cdm_database_schema.device_exposure m -group by m.device_CONCEPT_ID -; ---} - - - ---{2102 IN (@list_of_analysis_ids)}?{ --- 2102 Number of persons by device by start month, by device_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 2102 as analysis_id, - CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(YEAR(device_exposure_start_date)*100 + month(device_exposure_start_date) AS VARCHAR(255)) as stratum_2, - COUNT_BIG(distinct PERSON_ID) as count_value -from - @cdm_database_schema.device_exposure m -group by m.device_CONCEPT_ID, - YEAR(device_exposure_start_date)*100 + month(device_exposure_start_date) -; ---} - ---2103 is not implemented at this point - - ---{2104 IN (@list_of_analysis_ids)}?{ --- 2104 Number of persons with at least one device occurrence, by device_concept_id by calendar year by gender by age decile -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4, count_value) -select 2104 as analysis_id, - CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(YEAR(device_exposure_start_date) AS VARCHAR(255)) as stratum_2, - CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, - CAST(floor((year(device_exposure_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, - COUNT_BIG(distinct p1.PERSON_ID) as count_value -from @cdm_database_schema.PERSON p1 -inner join @cdm_database_schema.device_exposure m on p1.person_id = m.person_id -group by m.device_CONCEPT_ID, - YEAR(device_exposure_start_date), - p1.gender_concept_id, - floor((year(device_exposure_start_date) - p1.year_of_birth)/10) -; ---} - - ---{2105 IN (@list_of_analysis_ids)}?{ --- 2105 Number of exposure records by device_concept_id by device_type_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, count_value) -select 2105 as analysis_id, - CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - CAST(m.device_type_concept_id AS VARCHAR(255)) as stratum_2, - COUNT_BIG(m.PERSON_ID) as count_value -from @cdm_database_schema.device_exposure m -group by m.device_CONCEPT_ID, - m.device_type_concept_id -; ---} - ---2106 and more analyses are not implemented at this point - - - - - -/******************************************** - -ACHILLES Analyses on NOTE table - -*********************************************/ - - - ---{2200 IN (@list_of_analysis_ids)}?{ --- 2200 Number of persons with at least one device exposure , by device_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 2200 as analysis_id, - CAST(m.note_type_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(distinct m.PERSON_ID) as count_value -from - @cdm_database_schema.note m -group by m.note_type_CONCEPT_ID -; ---} - - ---{2201 IN (@list_of_analysis_ids)}?{ --- 2201 Number of device exposure records, by device_concept_id -insert into @results_database_schema.ACHILLES_results (analysis_id, stratum_1, count_value) -select 2201 as analysis_id, - CAST(m.note_type_CONCEPT_ID AS VARCHAR(255)) as stratum_1, - COUNT_BIG(m.PERSON_ID) as count_value -from - @cdm_database_schema.note m -group by m.note_type_CONCEPT_ID -; ---} - - - - - ---final processing of results -delete from @results_database_schema.ACHILLES_results -where count_value <= @smallcellcount; -delete from @results_database_schema.ACHILLES_results_dist -where count_value <= @smallcellcount; diff --git a/inst/sql/sql_server/analyses/0.sql b/inst/sql/sql_server/analyses/0.sql new file mode 100755 index 00000000..fec50be2 --- /dev/null +++ b/inst/sql/sql_server/analyses/0.sql @@ -0,0 +1,25 @@ +-- 0 cdm name, version of Achilles and date when pre-computations were executed + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 0 as analysis_id, CAST('@source_name' AS VARCHAR(255)) as stratum_1, CAST('@achilles_version' AS VARCHAR(255)) as stratum_2, +CAST(GETDATE() AS VARCHAR(255)) as stratum_3, +null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_0 +from @cdmDatabaseSchema.PERSON; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 0 as analysis_id, CAST('@source_name' AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value, + null as min_value, + null as max_value, + null as avg_value, + null as stdev_value, + null as median_value, + null as p10_value, + null as p25_value, + null as p75_value, + null as p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_0 +from @cdmDatabaseSchema.PERSON; diff --git a/inst/sql/sql_server/analyses/1.sql b/inst/sql/sql_server/analyses/1.sql new file mode 100755 index 00000000..d830dfa4 --- /dev/null +++ b/inst/sql/sql_server/analyses/1.sql @@ -0,0 +1,7 @@ +-- 1 Number of persons + +select 1 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1 +from @cdmDatabaseSchema.PERSON; diff --git a/inst/sql/sql_server/analyses/10.sql b/inst/sql/sql_server/analyses/10.sql new file mode 100755 index 00000000..952ad56c --- /dev/null +++ b/inst/sql/sql_server/analyses/10.sql @@ -0,0 +1,10 @@ +-- 10 Number of all persons by year of birth and by gender + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 10 as analysis_id, CAST(year_of_birth AS VARCHAR(255)) as stratum_1, + CAST(gender_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_10 +from @cdmDatabaseSchema.PERSON +group by YEAR_OF_BIRTH, gender_concept_id; diff --git a/inst/sql/sql_server/analyses/1000.sql b/inst/sql/sql_server/analyses/1000.sql new file mode 100755 index 00000000..1ef3342e --- /dev/null +++ b/inst/sql/sql_server/analyses/1000.sql @@ -0,0 +1,12 @@ +-- 1000 Number of persons with at least one condition occurrence, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1000 as analysis_id, + CAST(ce1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct ce1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1000 +from + @cdmDatabaseSchema.condition_era ce1 +group by ce1.condition_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/1001.sql b/inst/sql/sql_server/analyses/1001.sql new file mode 100755 index 00000000..dbbca64f --- /dev/null +++ b/inst/sql/sql_server/analyses/1001.sql @@ -0,0 +1,12 @@ +-- 1001 Number of condition occurrence records, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1001 as analysis_id, + CAST(ce1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(ce1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1001 +from + @cdmDatabaseSchema.condition_era ce1 +group by ce1.condition_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/1002.sql b/inst/sql/sql_server/analyses/1002.sql new file mode 100755 index 00000000..edc41e33 --- /dev/null +++ b/inst/sql/sql_server/analyses/1002.sql @@ -0,0 +1,14 @@ +-- 1002 Number of persons by condition occurrence start month, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1002 as analysis_id, + CAST(ce1.condition_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(condition_era_start_date)*100 + month(condition_era_start_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1002 +from +@cdmDatabaseSchema.condition_era ce1 +group by ce1.condition_concept_id, + YEAR(condition_era_start_date)*100 + month(condition_era_start_date) +; diff --git a/inst/sql/sql_server/analyses/1003.sql b/inst/sql/sql_server/analyses/1003.sql new file mode 100755 index 00000000..1c8e6050 --- /dev/null +++ b/inst/sql/sql_server/analyses/1003.sql @@ -0,0 +1,60 @@ +-- 1003 Number of distinct condition era concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select COUNT_BIG(distinct ce1.condition_concept_id) as count_value + from @cdmDatabaseSchema.condition_era ce1 + group by ce1.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 1003 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1003 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1004.sql b/inst/sql/sql_server/analyses/1004.sql new file mode 100755 index 00000000..62fbd7d7 --- /dev/null +++ b/inst/sql/sql_server/analyses/1004.sql @@ -0,0 +1,20 @@ +-- 1004 Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1004 as analysis_id, + CAST(ce1.condition_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(condition_era_start_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(condition_era_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1004 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.condition_era ce1 +on p1.person_id = ce1.person_id +group by ce1.condition_concept_id, + YEAR(condition_era_start_date), + p1.gender_concept_id, + floor((year(condition_era_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/1006.sql b/inst/sql/sql_server/analyses/1006.sql new file mode 100755 index 00000000..c793c3c8 --- /dev/null +++ b/inst/sql/sql_server/analyses/1006.sql @@ -0,0 +1,74 @@ +-- 1006 Distribution of age by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select ce.condition_concept_id as subject_id, + p1.gender_concept_id, + ce.condition_start_year - p1.year_of_birth as count_value +INTO #rawData_1006 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, condition_concept_id, min(year(condition_era_start_date)) as condition_start_year + from @cdmDatabaseSchema.condition_era + group by person_id, condition_concept_id +) ce on p1.person_id = ce.person_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_1006 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_1006 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 1006 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1006 +from #tempResults +; + +truncate table #rawData_1006; +drop table #rawData_1006; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1007.sql b/inst/sql/sql_server/analyses/1007.sql new file mode 100755 index 00000000..2777f194 --- /dev/null +++ b/inst/sql/sql_server/analyses/1007.sql @@ -0,0 +1,64 @@ +-- 1007 Distribution of condition era length, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +with rawData(stratum1_id, count_value) as +( + select condition_concept_id as stratum1_id, + datediff(dd,condition_era_start_date, condition_era_end_date) as count_value + from @cdmDatabaseSchema.condition_era ce1 +), +overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum1_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData + group by stratum1_id +), +statsView (stratum1_id, count_value, total, rn) as +( + select stratum1_id, + count_value, + count_big(*) as total, + row_number() over (partition by stratum1_id order by count_value) as rn + FROM rawData + group by stratum1_id, count_value +), +priorStats (stratum1_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn + group by s.stratum1_id, s.count_value, s.total, s.rn +) +select 1007 as analysis_id, + CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id +GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1007 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1008.sql b/inst/sql/sql_server/analyses/1008.sql new file mode 100755 index 00000000..79a243d5 --- /dev/null +++ b/inst/sql/sql_server/analyses/1008.sql @@ -0,0 +1,13 @@ +-- 1008 Number of condition eras with invalid person + + +select 1008 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(ce1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1008 +from + @cdmDatabaseSchema.condition_era ce1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = ce1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/1009.sql b/inst/sql/sql_server/analyses/1009.sql new file mode 100755 index 00000000..da6614ee --- /dev/null +++ b/inst/sql/sql_server/analyses/1009.sql @@ -0,0 +1,15 @@ +-- 1009 Number of condition eras outside valid observation period + + +select 1009 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(ce1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1009 +from + @cdmDatabaseSchema.condition_era ce1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = ce1.person_id + and ce1.condition_era_start_date >= op1.observation_period_start_date + and ce1.condition_era_start_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/101.sql b/inst/sql/sql_server/analyses/101.sql new file mode 100755 index 00000000..5b876cb0 --- /dev/null +++ b/inst/sql/sql_server/analyses/101.sql @@ -0,0 +1,11 @@ +-- 101 Number of persons by age, with age at first observation period + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 101 as analysis_id, CAST(year(op1.index_date) - p1.YEAR_OF_BIRTH AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_101 +from @cdmDatabaseSchema.PERSON p1 + inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdmDatabaseSchema.OBSERVATION_PERIOD group by PERSON_ID) op1 + on p1.PERSON_ID = op1.PERSON_ID +group by year(op1.index_date) - p1.YEAR_OF_BIRTH; diff --git a/inst/sql/sql_server/analyses/1010.sql b/inst/sql/sql_server/analyses/1010.sql new file mode 100755 index 00000000..3ea15650 --- /dev/null +++ b/inst/sql/sql_server/analyses/1010.sql @@ -0,0 +1,11 @@ +-- 1010 Number of condition eras with end date < start date + + +select 1010 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(ce1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1010 +from + @cdmDatabaseSchema.condition_era ce1 +where ce1.condition_era_end_date < ce1.condition_era_start_date +; diff --git a/inst/sql/sql_server/analyses/102.sql b/inst/sql/sql_server/analyses/102.sql new file mode 100755 index 00000000..d9ec1bae --- /dev/null +++ b/inst/sql/sql_server/analyses/102.sql @@ -0,0 +1,12 @@ +-- 102 Number of persons by gender by age, with age at first observation period + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 102 as analysis_id, CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_1, +CAST(year(op1.index_date) - p1.YEAR_OF_BIRTH AS VARCHAR(255)) as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_102 +from @cdmDatabaseSchema.PERSON p1 + inner join (select person_id, MIN(observation_period_start_date) as index_date from @cdmDatabaseSchema.OBSERVATION_PERIOD group by PERSON_ID) op1 + on p1.PERSON_ID = op1.PERSON_ID +group by p1.gender_concept_id, year(op1.index_date) - p1.YEAR_OF_BIRTH; diff --git a/inst/sql/sql_server/analyses/1020.sql b/inst/sql/sql_server/analyses/1020.sql new file mode 100755 index 00000000..e5ac817d --- /dev/null +++ b/inst/sql/sql_server/analyses/1020.sql @@ -0,0 +1,12 @@ +-- 1020 Number of drug era records by drug era start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1020 as analysis_id, + CAST(YEAR(condition_era_start_date)*100 + month(condition_era_start_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1020 +from +@cdmDatabaseSchema.condition_era ce1 +group by YEAR(condition_era_start_date)*100 + month(condition_era_start_date) +; diff --git a/inst/sql/sql_server/analyses/103.sql b/inst/sql/sql_server/analyses/103.sql new file mode 100755 index 00000000..342b6955 --- /dev/null +++ b/inst/sql/sql_server/analyses/103.sql @@ -0,0 +1,57 @@ +-- 103 Distribution of age at first observation period + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData (person_id, age_value) as +( +select p.person_id, + MIN(YEAR(observation_period_start_date)) - P.YEAR_OF_BIRTH as age_value + from @cdmDatabaseSchema.PERSON p + JOIN @cdmDatabaseSchema.OBSERVATION_PERIOD op on p.person_id = op.person_id + group by p.person_id, p.year_of_birth +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * age_value) AS FLOAT) as avg_value, + CAST(stdev(age_value) AS FLOAT) as stdev_value, + min(age_value) as min_value, + max(age_value) as max_value, + count_big(*) as total + FROM rawData +), +ageStats (age_value, total, rn) as +( + select age_value, count_big(*) as total, row_number() over (order by age_value) as rn + from rawData + group by age_value +), +ageStatsPrior (age_value, total, accumulated) as +( + select s.age_value, s.total, sum(p.total) as accumulated + from ageStats s + join ageStats p on p.rn <= s.rn + group by s.age_value, s.total, s.rn +), +tempResults as +( + select 103 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then age_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then age_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then age_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then age_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then age_value end) as p90_value + --INTO #tempResults + from ageStatsPrior p + CROSS JOIN overallStats o + GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_103 +from tempResults +; diff --git a/inst/sql/sql_server/analyses/104.sql b/inst/sql/sql_server/analyses/104.sql new file mode 100755 index 00000000..c4f13951 --- /dev/null +++ b/inst/sql/sql_server/analyses/104.sql @@ -0,0 +1,62 @@ +-- 104 Distribution of age at first observation period by gender + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +with rawData (gender_concept_id, age_value) as +( + select p.gender_concept_id, MIN(YEAR(observation_period_start_date)) - P.YEAR_OF_BIRTH as age_value + from @cdmDatabaseSchema.PERSON p + JOIN @cdmDatabaseSchema.OBSERVATION_PERIOD op on p.person_id = op.person_id + group by p.person_id,p.gender_concept_id, p.year_of_birth +), +overallStats (gender_concept_id, avg_value, stdev_value, min_value, max_value, total) as +( + select gender_concept_id, + CAST(avg(1.0 * age_value) AS FLOAT) as avg_value, + CAST(stdev(age_value) AS FLOAT) as stdev_value, + min(age_value) as min_value, + max(age_value) as max_value, + count_big(*) as total + FROM rawData + group by gender_concept_id +), +ageStats (gender_concept_id, age_value, total, rn) as +( + select gender_concept_id, age_value, count_big(*) as total, row_number() over (order by age_value) as rn + FROM rawData + group by gender_concept_id, age_value +), +ageStatsPrior (gender_concept_id, age_value, total, accumulated) as +( + select s.gender_concept_id, s.age_value, s.total, sum(p.total) as accumulated + from ageStats s + join ageStats p on s.gender_concept_id = p.gender_concept_id and p.rn <= s.rn + group by s.gender_concept_id, s.age_value, s.total, s.rn +) +select 104 as analysis_id, + CAST(o.gender_concept_id AS VARCHAR(255)) as stratum_1, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then age_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then age_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then age_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then age_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then age_value end) as p90_value +INTO #tempResults +from ageStatsPrior p +join overallStats o on p.gender_concept_id = o.gender_concept_id +GROUP BY o.gender_concept_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_104 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/105.sql b/inst/sql/sql_server/analyses/105.sql new file mode 100755 index 00000000..cd873dfa --- /dev/null +++ b/inst/sql/sql_server/analyses/105.sql @@ -0,0 +1,68 @@ +-- 105 Length of observation (days) of first observation period + +--HINT DISTRIBUTE_ON_KEY(count_value) +select count_value, rn +into #tempObs +FROM +( + select DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value, + ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn + from @cdmDatabaseSchema.OBSERVATION_PERIOD op +) A +where rn = 1; + +select count_value, count_big(*) as total, row_number() over (order by count_value) as rn +into #statsView +FROM #tempObs +group by count_value; + +--HINT DISTRIBUTE_ON_KEY(count_value) +with overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from #tempObs +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from #statsView s + join #statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 105 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, count_value, +min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_105 +from #tempResults +; + +truncate table #tempObs; +drop table #tempObs; + +truncate table #statsView; +drop table #statsView; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/106.sql b/inst/sql/sql_server/analyses/106.sql new file mode 100755 index 00000000..c6be336c --- /dev/null +++ b/inst/sql/sql_server/analyses/106.sql @@ -0,0 +1,71 @@ +-- 106 Length of observation (days) of first observation period by gender + +--HINT DISTRIBUTE_ON_KEY(gender_concept_id) +select p.gender_concept_id, op.count_value +into #rawData +FROM +( + select person_id, DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value, + ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn + from @cdmDatabaseSchema.OBSERVATION_PERIOD op +) op +JOIN @cdmDatabaseSchema.PERSON p on op.person_id = p.person_id +where op.rn = 1 +; + +--HINT DISTRIBUTE_ON_KEY(gender_concept_id) +with overallStats (gender_concept_id, avg_value, stdev_value, min_value, max_value, total) as +( + select gender_concept_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData + group by gender_concept_id +), +statsView (gender_concept_id, count_value, total, rn) as +( + select gender_concept_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn + FROM #rawData + group by gender_concept_id, count_value +), +priorStats (gender_concept_id,count_value, total, accumulated) as +( + select s.gender_concept_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.gender_concept_id = p.gender_concept_id and p.rn <= s.rn + group by s.gender_concept_id, s.count_value, s.total, s.rn +) +select 106 as analysis_id, + CAST(o.gender_concept_id AS VARCHAR(255)) as gender_concept_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value end) as p90_value +INTO #tempResults +from priorStats p +join overallStats o on p.gender_concept_id = o.gender_concept_id +GROUP BY o.gender_concept_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, gender_concept_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_106 +FROM #tempResults +; + +truncate table #rawData; +drop table #rawData; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/107.sql b/inst/sql/sql_server/analyses/107.sql new file mode 100755 index 00000000..c353ae1e --- /dev/null +++ b/inst/sql/sql_server/analyses/107.sql @@ -0,0 +1,73 @@ +-- 107 Length of observation (days) of first observation period by age decile + +--HINT DISTRIBUTE_ON_KEY(age_decile) +with rawData (age_decile, count_value) as +( + select floor((year(op.OBSERVATION_PERIOD_START_DATE) - p.YEAR_OF_BIRTH)/10) as age_decile, + DATEDIFF(dd,op.observation_period_start_date, op.observation_period_end_date) as count_value + FROM + ( + select person_id, + op.observation_period_start_date, + op.observation_period_end_date, + ROW_NUMBER() over (PARTITION by op.person_id order by op.observation_period_start_date asc) as rn + from @cdmDatabaseSchema.OBSERVATION_PERIOD op + ) op + JOIN @cdmDatabaseSchema.PERSON p on op.person_id = p.person_id + where op.rn = 1 +), +overallStats (age_decile, avg_value, stdev_value, min_value, max_value, total) as +( + select age_decile, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData + group by age_decile +), +statsView (age_decile, count_value, total, rn) as +( + select age_decile, + count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by age_decile, count_value +), +priorStats (age_decile,count_value, total, accumulated) as +( + select s.age_decile, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.age_decile = p.age_decile and p.rn <= s.rn + group by s.age_decile, s.count_value, s.total, s.rn +) +select 107 as analysis_id, + CAST(o.age_decile AS VARCHAR(255)) as age_decile, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.age_decile = o.age_decile +GROUP BY o.age_decile, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, age_decile as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_107 +FROM #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/108.sql b/inst/sql/sql_server/analyses/108.sql new file mode 100755 index 00000000..b0906938 --- /dev/null +++ b/inst/sql/sql_server/analyses/108.sql @@ -0,0 +1,19 @@ +-- 108 Number of persons by length of observation period, in 30d increments + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 108 as analysis_id, CAST(floor(DATEDIFF(dd, op1.observation_period_start_date, op1.observation_period_end_date)/30) AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_108 +from @cdmDatabaseSchema.PERSON p1 + inner join + (select person_id, + OBSERVATION_PERIOD_START_DATE, + OBSERVATION_PERIOD_END_DATE, + ROW_NUMBER() over (PARTITION by person_id order by observation_period_start_date asc) as rn1 + from @cdmDatabaseSchema.OBSERVATION_PERIOD + ) op1 + on p1.PERSON_ID = op1.PERSON_ID + where op1.rn1 = 1 +group by floor(DATEDIFF(dd, op1.observation_period_start_date, op1.observation_period_end_date)/30) +; diff --git a/inst/sql/sql_server/analyses/109.sql b/inst/sql/sql_server/analyses/109.sql new file mode 100755 index 00000000..efff4394 --- /dev/null +++ b/inst/sql/sql_server/analyses/109.sql @@ -0,0 +1,32 @@ +-- 109 Number of persons with continuous observation in each year +-- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle + +--HINT DISTRIBUTE_ON_KEY(obs_year) +SELECT DISTINCT + YEAR(observation_period_start_date) AS obs_year, + DATEFROMPARTS(YEAR(observation_period_start_date), 1, 1) AS obs_year_start, + DATEFROMPARTS(YEAR(observation_period_start_date), 12, 31) AS obs_year_end +INTO + #temp_dates +FROM @cdmDatabaseSchema.observation_period +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +SELECT + 109 AS analysis_id, + CAST(obs_year AS VARCHAR(255)) AS stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(DISTINCT person_id) AS count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_109 +FROM @cdmDatabaseSchema.observation_period, + #temp_dates +WHERE + observation_period_start_date <= obs_year_start + AND + observation_period_end_date >= obs_year_end +GROUP BY + obs_year +; + +TRUNCATE TABLE #temp_dates; +DROP TABLE #temp_dates; diff --git a/inst/sql/sql_server/analyses/11.sql b/inst/sql/sql_server/analyses/11.sql new file mode 100755 index 00000000..2ec18a7a --- /dev/null +++ b/inst/sql/sql_server/analyses/11.sql @@ -0,0 +1,11 @@ +-- 11 Number of non-deceased persons by year of birth and by gender + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 11 as analysis_id, CAST(year_of_birth AS VARCHAR(255)) as stratum_1, + CAST(gender_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_11 +from @cdmDatabaseSchema.PERSON +where person_id not in (select person_id from @cdmDatabaseSchema.DEATH) +group by YEAR_OF_BIRTH, gender_concept_id; diff --git a/inst/sql/sql_server/analyses/110.sql b/inst/sql/sql_server/analyses/110.sql new file mode 100755 index 00000000..4c73755b --- /dev/null +++ b/inst/sql/sql_server/analyses/110.sql @@ -0,0 +1,25 @@ +-- 110 Number of persons with continuous observation in each month +-- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +SELECT + 110 as analysis_id, + CAST(t1.obs_month AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_110 +FROM +@cdmDatabaseSchema.observation_period op1 +join +( + SELECT DISTINCT + YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) AS obs_month, + DATEFROMPARTS(YEAR(observation_period_start_date), MONTH(observation_period_start_date), 1) + AS obs_month_start, + EOMONTH(observation_period_start_date) AS obs_month_end + FROM @cdmDatabaseSchema.observation_period +) t1 on op1.observation_period_start_date <= t1.obs_month_start + and op1.observation_period_end_date >= t1.obs_month_end +group by t1.obs_month; + + diff --git a/inst/sql/sql_server/analyses/1100.sql b/inst/sql/sql_server/analyses/1100.sql new file mode 100755 index 00000000..963f8d4e --- /dev/null +++ b/inst/sql/sql_server/analyses/1100.sql @@ -0,0 +1,14 @@ +-- 1100 Number of persons by location 3-digit zip + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1100 as analysis_id, + CAST(left(l1.zip,3) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1100 +from @cdmDatabaseSchema.PERSON p1 + inner join @cdmDatabaseSchema.LOCATION l1 + on p1.location_id = l1.location_id +where p1.location_id is not null + and l1.zip is not null +group by left(l1.zip,3); diff --git a/inst/sql/sql_server/analyses/1101.sql b/inst/sql/sql_server/analyses/1101.sql new file mode 100755 index 00000000..9f61f0b7 --- /dev/null +++ b/inst/sql/sql_server/analyses/1101.sql @@ -0,0 +1,14 @@ +-- 1101 Number of persons by location state + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1101 as analysis_id, + CAST(l1.state AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1101 +from @cdmDatabaseSchema.PERSON p1 + inner join @cdmDatabaseSchema.LOCATION l1 + on p1.location_id = l1.location_id +where p1.location_id is not null + and l1.state is not null +group by l1.state; diff --git a/inst/sql/sql_server/analyses/1102.sql b/inst/sql/sql_server/analyses/1102.sql new file mode 100755 index 00000000..b2eaafdd --- /dev/null +++ b/inst/sql/sql_server/analyses/1102.sql @@ -0,0 +1,14 @@ +-- 1102 Number of care sites by location 3-digit zip + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1102 as analysis_id, + CAST(left(l1.zip,3) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct care_site_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1102 +from @cdmDatabaseSchema.care_site cs1 + inner join @cdmDatabaseSchema.LOCATION l1 + on cs1.location_id = l1.location_id +where cs1.location_id is not null + and l1.zip is not null +group by left(l1.zip,3); diff --git a/inst/sql/sql_server/analyses/1103.sql b/inst/sql/sql_server/analyses/1103.sql new file mode 100755 index 00000000..3bafe75f --- /dev/null +++ b/inst/sql/sql_server/analyses/1103.sql @@ -0,0 +1,14 @@ +-- 1103 Number of care sites by location state + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1103 as analysis_id, + CAST(l1.state AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct care_site_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1103 +from @cdmDatabaseSchema.care_site cs1 + inner join @cdmDatabaseSchema.LOCATION l1 + on cs1.location_id = l1.location_id +where cs1.location_id is not null + and l1.state is not null +group by l1.state; diff --git a/inst/sql/sql_server/analyses/111.sql b/inst/sql/sql_server/analyses/111.sql new file mode 100755 index 00000000..23d3884e --- /dev/null +++ b/inst/sql/sql_server/analyses/111.sql @@ -0,0 +1,12 @@ +-- 111 Number of persons by observation period start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 111 as analysis_id, + CAST(YEAR(observation_period_start_date)*100 + month(OBSERVATION_PERIOD_START_DATE) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_111 +from + @cdmDatabaseSchema.observation_period op1 +group by YEAR(observation_period_start_date)*100 + month(OBSERVATION_PERIOD_START_DATE) +; diff --git a/inst/sql/sql_server/analyses/112.sql b/inst/sql/sql_server/analyses/112.sql new file mode 100755 index 00000000..517904e4 --- /dev/null +++ b/inst/sql/sql_server/analyses/112.sql @@ -0,0 +1,12 @@ +-- 112 Number of persons by observation period end month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 112 as analysis_id, + CAST(YEAR(observation_period_end_date)*100 + month(observation_period_end_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_112 +from + @cdmDatabaseSchema.observation_period op1 +group by YEAR(observation_period_end_date)*100 + month(observation_period_end_date) +; diff --git a/inst/sql/sql_server/analyses/113.sql b/inst/sql/sql_server/analyses/113.sql new file mode 100755 index 00000000..110231e8 --- /dev/null +++ b/inst/sql/sql_server/analyses/113.sql @@ -0,0 +1,12 @@ +-- 113 Number of persons by number of observation periods + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 113 as analysis_id, + CAST(op1.num_periods AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_113 +from + (select person_id, COUNT_BIG(OBSERVATION_period_start_date) as num_periods from @cdmDatabaseSchema.OBSERVATION_PERIOD group by PERSON_ID) op1 +group by op1.num_periods +; diff --git a/inst/sql/sql_server/analyses/114.sql b/inst/sql/sql_server/analyses/114.sql new file mode 100755 index 00000000..6a032054 --- /dev/null +++ b/inst/sql/sql_server/analyses/114.sql @@ -0,0 +1,12 @@ +-- 114 Number of persons with observation period before year-of-birth + +select 114 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_114 +from + @cdmDatabaseSchema.PERSON p1 + inner join (select person_id, MIN(year(OBSERVATION_period_start_date)) as first_obs_year from @cdmDatabaseSchema.OBSERVATION_PERIOD group by PERSON_ID) op1 + on p1.person_id = op1.person_id +where p1.year_of_birth > op1.first_obs_year +; diff --git a/inst/sql/sql_server/analyses/115.sql b/inst/sql/sql_server/analyses/115.sql new file mode 100755 index 00000000..655f0473 --- /dev/null +++ b/inst/sql/sql_server/analyses/115.sql @@ -0,0 +1,10 @@ +-- 115 Number of persons with observation period end < start + +select 115 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_115 +from + @cdmDatabaseSchema.observation_period op1 +where op1.observation_period_end_date < op1.observation_period_start_date +; diff --git a/inst/sql/sql_server/analyses/116.sql b/inst/sql/sql_server/analyses/116.sql new file mode 100755 index 00000000..d84810cb --- /dev/null +++ b/inst/sql/sql_server/analyses/116.sql @@ -0,0 +1,37 @@ +-- 116 Number of persons with at least one day of observation in each year by gender and age decile +-- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle + + + +select distinct + YEAR(observation_period_start_date) as obs_year +INTO + #temp_dates +from + @cdmDatabaseSchema.OBSERVATION_PERIOD +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 116 as analysis_id, + CAST(t1.obs_year AS VARCHAR(255)) as stratum_1, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_2, + CAST(floor((t1.obs_year - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_3, + null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_116 +from + @cdmDatabaseSchema.PERSON p1 + inner join + @cdmDatabaseSchema.observation_period op1 + on p1.person_id = op1.person_id + , + #temp_dates t1 +where year(op1.OBSERVATION_PERIOD_START_DATE) <= t1.obs_year + and year(op1.OBSERVATION_PERIOD_END_DATE) >= t1.obs_year +group by t1.obs_year, + p1.gender_concept_id, + floor((t1.obs_year - p1.year_of_birth)/10) +; + +TRUNCATE TABLE #temp_dates; +DROP TABLE #temp_dates; diff --git a/inst/sql/sql_server/analyses/117.sql b/inst/sql/sql_server/analyses/117.sql new file mode 100755 index 00000000..e95473c4 --- /dev/null +++ b/inst/sql/sql_server/analyses/117.sql @@ -0,0 +1,21 @@ +-- 117 Number of persons with at least one day of observation in each year by gender and age decile +-- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +SELECT + 117 as analysis_id, + CAST(t1.obs_month AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_117 +FROM +@cdmDatabaseSchema.observation_period op1 +join +( + select distinct + YEAR(observation_period_start_date)*100 + MONTH(observation_period_start_date) as obs_month + from + @cdmDatabaseSchema.OBSERVATION_PERIOD +) t1 on YEAR(op1.observation_period_start_date)*100 + MONTH(op1.observation_period_start_date) <= t1.obs_month + and YEAR(op1.observation_period_end_date)*100 + MONTH(op1.observation_period_end_date) >= t1.obs_month +group by t1.obs_month; \ No newline at end of file diff --git a/inst/sql/sql_server/analyses/118.sql b/inst/sql/sql_server/analyses/118.sql new file mode 100755 index 00000000..9cb26a4f --- /dev/null +++ b/inst/sql/sql_server/analyses/118.sql @@ -0,0 +1,12 @@ +-- 118 Number of observation period records with invalid person_id + +select 118 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(op1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_118 +from + @cdmDatabaseSchema.observation_period op1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = op1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/119.sql b/inst/sql/sql_server/analyses/119.sql new file mode 100755 index 00000000..fa32f5ae --- /dev/null +++ b/inst/sql/sql_server/analyses/119.sql @@ -0,0 +1,12 @@ +-- 119 Number of observation period records by period_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 119 as analysis_id, + CAST(op1.period_type_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(*) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_119 +from + @cdmDatabaseSchema.observation_period op1 +group by op1.period_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/12.sql b/inst/sql/sql_server/analyses/12.sql new file mode 100755 index 00000000..2f6bcf53 --- /dev/null +++ b/inst/sql/sql_server/analyses/12.sql @@ -0,0 +1,9 @@ +-- 12 Number of persons by race and ethnicity + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 12 as analysis_id, CAST(RACE_CONCEPT_ID AS VARCHAR(255)) as stratum_1, CAST(ETHNICITY_CONCEPT_ID AS VARCHAR(255)) as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_12 +from @cdmDatabaseSchema.PERSON +group by RACE_CONCEPT_ID,ETHNICITY_CONCEPT_ID; diff --git a/inst/sql/sql_server/analyses/1200.sql b/inst/sql/sql_server/analyses/1200.sql new file mode 100755 index 00000000..e0e7ec7c --- /dev/null +++ b/inst/sql/sql_server/analyses/1200.sql @@ -0,0 +1,14 @@ +-- 1200 Number of persons by place of service + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1200 as analysis_id, + CAST(cs1.place_of_service_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1200 +from @cdmDatabaseSchema.PERSON p1 + inner join @cdmDatabaseSchema.care_site cs1 + on p1.care_site_id = cs1.care_site_id +where p1.care_site_id is not null + and cs1.place_of_service_concept_id is not null +group by cs1.place_of_service_concept_id; diff --git a/inst/sql/sql_server/analyses/1201.sql b/inst/sql/sql_server/analyses/1201.sql new file mode 100755 index 00000000..b20ba0be --- /dev/null +++ b/inst/sql/sql_server/analyses/1201.sql @@ -0,0 +1,14 @@ +-- 1201 Number of visits by place of service + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1201 as analysis_id, + CAST(cs1.place_of_service_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(visit_occurrence_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1201 +from @cdmDatabaseSchema.visit_occurrence vo1 + inner join @cdmDatabaseSchema.care_site cs1 + on vo1.care_site_id = cs1.care_site_id +where vo1.care_site_id is not null + and cs1.place_of_service_concept_id is not null +group by cs1.place_of_service_concept_id; diff --git a/inst/sql/sql_server/analyses/1202.sql b/inst/sql/sql_server/analyses/1202.sql new file mode 100755 index 00000000..c1a00069 --- /dev/null +++ b/inst/sql/sql_server/analyses/1202.sql @@ -0,0 +1,11 @@ +-- 1202 Number of care sites by place of service + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1202 as analysis_id, + CAST(cs1.place_of_service_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(care_site_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1202 +from @cdmDatabaseSchema.care_site cs1 +where cs1.place_of_service_concept_id is not null +group by cs1.place_of_service_concept_id; diff --git a/inst/sql/sql_server/analyses/1406.sql b/inst/sql/sql_server/analyses/1406.sql new file mode 100755 index 00000000..23e1f668 --- /dev/null +++ b/inst/sql/sql_server/analyses/1406.sql @@ -0,0 +1,73 @@ +-- 1406 Length of payer plan (days) of first payer plan period by gender + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +with rawData(stratum1_id, count_value) as +( + select p1.gender_concept_id as stratum1_id, + DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date) as count_value + from @cdmDatabaseSchema.PERSON p1 + inner join + (select person_id, + payer_plan_period_START_DATE, + payer_plan_period_END_DATE, + ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 + from @cdmDatabaseSchema.payer_plan_period + ) ppp1 + on p1.PERSON_ID = ppp1.PERSON_ID + where ppp1.rn1 = 1 +), +overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum1_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData + group by stratum1_id +), +statsView (stratum1_id, count_value, total, rn) as +( + select stratum1_id, + count_value, + count_big(*) as total, + row_number() over (partition by stratum1_id order by count_value) as rn + FROM rawData + group by stratum1_id, count_value +), +priorStats (stratum1_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn + group by s.stratum1_id, s.count_value, s.total, s.rn +) +select 1406 as analysis_id, + CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id +GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1406 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1407.sql b/inst/sql/sql_server/analyses/1407.sql new file mode 100755 index 00000000..76904830 --- /dev/null +++ b/inst/sql/sql_server/analyses/1407.sql @@ -0,0 +1,70 @@ +-- 1407 Length of payer plan (days) of first payer plan period by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_id) +with rawData(stratum_id, count_value) as +( + select floor((year(ppp1.payer_plan_period_START_DATE) - p1.YEAR_OF_BIRTH)/10) as stratum_id, + DATEDIFF(dd,ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date) as count_value + from @cdmDatabaseSchema.PERSON p1 + inner join + (select person_id, + payer_plan_period_START_DATE, + payer_plan_period_END_DATE, + ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 + from @cdmDatabaseSchema.payer_plan_period + ) ppp1 + on p1.PERSON_ID = ppp1.PERSON_ID + where ppp1.rn1 = 1 +), +overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum_id +), +statsView (stratum_id, count_value, total, rn) as +( + select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn + FROM rawData + group by stratum_id, count_value +), +priorStats (stratum_id, count_value, total, accumulated) as +( + select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn + group by s.stratum_id, s.count_value, s.total, s.rn +) +select 1407 as analysis_id, + CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum_id = o.stratum_id +GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1407 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1408.sql b/inst/sql/sql_server/analyses/1408.sql new file mode 100755 index 00000000..b21431f8 --- /dev/null +++ b/inst/sql/sql_server/analyses/1408.sql @@ -0,0 +1,20 @@ +-- 1408 Number of persons by length of payer plan period, in 30d increments + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1408 as analysis_id, + CAST(floor(DATEDIFF(dd, ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)/30) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1408 +from @cdmDatabaseSchema.PERSON p1 + inner join + (select person_id, + payer_plan_period_START_DATE, + payer_plan_period_END_DATE, + ROW_NUMBER() over (PARTITION by person_id order by payer_plan_period_start_date asc) as rn1 + from @cdmDatabaseSchema.payer_plan_period + ) ppp1 + on p1.PERSON_ID = ppp1.PERSON_ID + where ppp1.rn1 = 1 +group by CAST(floor(DATEDIFF(dd, ppp1.payer_plan_period_start_date, ppp1.payer_plan_period_end_date)/30) AS VARCHAR(255)) +; diff --git a/inst/sql/sql_server/analyses/1409.sql b/inst/sql/sql_server/analyses/1409.sql new file mode 100755 index 00000000..009076d4 --- /dev/null +++ b/inst/sql/sql_server/analyses/1409.sql @@ -0,0 +1,33 @@ +-- 1409 Number of persons with continuous payer plan in each year +-- Note: using temp table instead of nested query because this gives vastly improved + + + +select distinct + YEAR(payer_plan_period_start_date) as obs_year +INTO + #temp_dates +from + @cdmDatabaseSchema.payer_plan_period +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1409 as analysis_id, + CAST(t1.obs_year AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1409 +from + @cdmDatabaseSchema.PERSON p1 + inner join + @cdmDatabaseSchema.payer_plan_period ppp1 + on p1.person_id = ppp1.person_id + , + #temp_dates t1 +where year(ppp1.payer_plan_period_START_DATE) <= t1.obs_year + and year(ppp1.payer_plan_period_END_DATE) >= t1.obs_year +group by t1.obs_year +; + +truncate table #temp_dates; +drop table #temp_dates; diff --git a/inst/sql/sql_server/analyses/1410.sql b/inst/sql/sql_server/analyses/1410.sql new file mode 100755 index 00000000..33e1ef4a --- /dev/null +++ b/inst/sql/sql_server/analyses/1410.sql @@ -0,0 +1,35 @@ +-- 1410 Number of persons with continuous payer plan in each month +-- Note: using temp table instead of nested query because this gives vastly improved performance in Oracle + +--HINT DISTRIBUTE_ON_KEY(obs_month) +SELECT DISTINCT + YEAR(payer_plan_period_start_date)*100 + MONTH(payer_plan_period_start_date) AS obs_month, + DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_start_date), 1) as obs_month_start, + EOMONTH(payer_plan_period_start_date) as obs_month_end +INTO + #temp_dates +FROM + @cdmDatabaseSchema.payer_plan_period +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select + 1410 as analysis_id, + CAST(obs_month AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1410 +from + @cdmDatabaseSchema.PERSON p1 + inner join + @cdmDatabaseSchema.payer_plan_period ppp1 + on p1.person_id = ppp1.person_id + , + #temp_dates +where ppp1.payer_plan_period_START_DATE <= obs_month_start + and ppp1.payer_plan_period_END_DATE >= obs_month_end +group by obs_month +; + +TRUNCATE TABLE #temp_dates; +DROP TABLE #temp_dates; diff --git a/inst/sql/sql_server/analyses/1411.sql b/inst/sql/sql_server/analyses/1411.sql new file mode 100755 index 00000000..aabf79eb --- /dev/null +++ b/inst/sql/sql_server/analyses/1411.sql @@ -0,0 +1,14 @@ +-- 1411 Number of persons by payer plan period start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1411 as analysis_id, + DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_start_date), 1) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1411 +from + @cdmDatabaseSchema.PERSON p1 + inner join @cdmDatabaseSchema.payer_plan_period ppp1 + on p1.person_id = ppp1.person_id +group by DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_start_date), 1) +; diff --git a/inst/sql/sql_server/analyses/1412.sql b/inst/sql/sql_server/analyses/1412.sql new file mode 100755 index 00000000..ca2867c1 --- /dev/null +++ b/inst/sql/sql_server/analyses/1412.sql @@ -0,0 +1,14 @@ +-- 1412 Number of persons by payer plan period end month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1412 as analysis_id, + DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_start_date), 1) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1412 +from + @cdmDatabaseSchema.PERSON p1 + inner join @cdmDatabaseSchema.payer_plan_period ppp1 + on p1.person_id = ppp1.person_id +group by DATEFROMPARTS(YEAR(payer_plan_period_start_date), MONTH(payer_plan_period_start_date), 1) +; diff --git a/inst/sql/sql_server/analyses/1413.sql b/inst/sql/sql_server/analyses/1413.sql new file mode 100755 index 00000000..3283b62e --- /dev/null +++ b/inst/sql/sql_server/analyses/1413.sql @@ -0,0 +1,14 @@ +-- 1413 Number of persons by number of payer plan periods + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1413 as analysis_id, + CAST(ppp1.num_periods AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1413 +from + @cdmDatabaseSchema.PERSON p1 + inner join (select person_id, COUNT_BIG(payer_plan_period_start_date) as num_periods from @cdmDatabaseSchema.payer_plan_period group by PERSON_ID) ppp1 + on p1.person_id = ppp1.person_id +group by ppp1.num_periods +; diff --git a/inst/sql/sql_server/analyses/1414.sql b/inst/sql/sql_server/analyses/1414.sql new file mode 100755 index 00000000..745f4994 --- /dev/null +++ b/inst/sql/sql_server/analyses/1414.sql @@ -0,0 +1,13 @@ +-- 1414 Number of persons with payer plan period before year-of-birth + + +select 1414 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1414 +from + @cdmDatabaseSchema.PERSON p1 + inner join (select person_id, MIN(year(payer_plan_period_start_date)) as first_obs_year from @cdmDatabaseSchema.payer_plan_period group by PERSON_ID) ppp1 + on p1.person_id = ppp1.person_id +where p1.year_of_birth > ppp1.first_obs_year +; diff --git a/inst/sql/sql_server/analyses/1415.sql b/inst/sql/sql_server/analyses/1415.sql new file mode 100755 index 00000000..e249ab4c --- /dev/null +++ b/inst/sql/sql_server/analyses/1415.sql @@ -0,0 +1,11 @@ +-- 1415 Number of persons with payer plan period end < start + + +select 1415 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(ppp1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1415 +from + @cdmDatabaseSchema.payer_plan_period ppp1 +where ppp1.payer_plan_period_end_date < ppp1.payer_plan_period_start_date +; diff --git a/inst/sql/sql_server/analyses/1500.sql b/inst/sql/sql_server/analyses/1500.sql new file mode 100755 index 00000000..fa6afdf4 --- /dev/null +++ b/inst/sql/sql_server/analyses/1500.sql @@ -0,0 +1,30 @@ +-- 1500 Number of drug cost records with invalid drug exposure id + +{cdmVersion == '5'}?{ + + +select 1500 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(dc1.drug_cost_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1500 +from + @cdmDatabaseSchema.drug_cost dc1 + left join @cdmDatabaseSchema.drug_exposure de1 + on dc1.drug_exposure_id = de1.drug_exposure_id +where de1.drug_exposure_id is null +; + +}:{ + +select 1500 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(dc1.cost_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1500 +from + @cdmDatabaseSchema.cost dc1 + left join @cdmDatabaseSchema.drug_exposure de1 + on dc1.cost_event_id = de1.drug_exposure_id +where de1.drug_exposure_id is null +and dc1.cost_domain_id = 'Drug' +; +} diff --git a/inst/sql/sql_server/analyses/1501.sql b/inst/sql/sql_server/analyses/1501.sql new file mode 100755 index 00000000..20c86f50 --- /dev/null +++ b/inst/sql/sql_server/analyses/1501.sql @@ -0,0 +1,30 @@ +-- 1501 Number of drug cost records with invalid payer plan period id + +{cdmVersion == '5'}?{ + + select 1501 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(dc1.drug_cost_ID) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1501 + from + @cdmDatabaseSchema.drug_cost dc1 + left join @cdmDatabaseSchema.payer_plan_period ppp1 + on dc1.payer_plan_period_id = ppp1.payer_plan_period_id + where dc1.payer_plan_period_id is not null + and ppp1.payer_plan_period_id is null + ; + +}:{ + select 1501 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(dc1.cost_id) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1501 + from + @cdmDatabaseSchema.cost dc1 + left join @cdmDatabaseSchema.payer_plan_period ppp1 + on dc1.payer_plan_period_id = ppp1.payer_plan_period_id + where dc1.payer_plan_period_id is not null + and ppp1.payer_plan_period_id is null + and dc1.cost_domain_id = 'Drug' + ; +} diff --git a/inst/sql/sql_server/analyses/1600.sql b/inst/sql/sql_server/analyses/1600.sql new file mode 100755 index 00000000..9b7856ae --- /dev/null +++ b/inst/sql/sql_server/analyses/1600.sql @@ -0,0 +1,28 @@ +-- 1600 Number of procedure cost records with invalid procedure exposure id + +{cdmVersion == '5'}?{ + + select 1600 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(pc1.procedure_cost_ID) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1600 + from + @cdmDatabaseSchema.procedure_cost pc1 + left join @cdmDatabaseSchema.procedure_occurrence po1 + on pc1.procedure_occurrence_id = po1.procedure_occurrence_id + where po1.procedure_occurrence_id is null + ; +}:{ + + select 1600 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(pc1.cost_id) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1600 + from + @cdmDatabaseSchema.cost pc1 + left join @cdmDatabaseSchema.procedure_occurrence po1 + on pc1.cost_event_id = po1.procedure_occurrence_id + where po1.procedure_occurrence_id is null + and pc1.cost_domain_id = 'Procedure' + ; +} diff --git a/inst/sql/sql_server/analyses/1601.sql b/inst/sql/sql_server/analyses/1601.sql new file mode 100755 index 00000000..c0d321ae --- /dev/null +++ b/inst/sql/sql_server/analyses/1601.sql @@ -0,0 +1,30 @@ +-- 1601 Number of procedure cost records with invalid payer plan period id + +{cdmVersion == '5'}?{ + + select 1601 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(pc1.procedure_cost_ID) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1601 + from + @cdmDatabaseSchema.procedure_cost pc1 + left join @cdmDatabaseSchema.payer_plan_period ppp1 + on pc1.payer_plan_period_id = ppp1.payer_plan_period_id + where pc1.payer_plan_period_id is not null + and ppp1.payer_plan_period_id is null + ; +}:{ + + select 1601 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(pc1.cost_id) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1601 + from + @cdmDatabaseSchema.cost pc1 + left join @cdmDatabaseSchema.payer_plan_period ppp1 + on pc1.payer_plan_period_id = ppp1.payer_plan_period_id + where pc1.payer_plan_period_id is not null + and ppp1.payer_plan_period_id is null + and pc1.cost_domain_id = 'Procedure' + ; +} diff --git a/inst/sql/sql_server/analyses/1610.sql b/inst/sql/sql_server/analyses/1610.sql new file mode 100755 index 00000000..c2fac084 --- /dev/null +++ b/inst/sql/sql_server/analyses/1610.sql @@ -0,0 +1,32 @@ +-- 1610 Number of records by revenue_code_concept_id + +{cdmVersion == '5'}?{ + + --HINT DISTRIBUTE_ON_KEY(stratum_1) + select 1610 as analysis_id, + CAST(revenue_code_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(pc1.procedure_cost_ID) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1610 + from + @cdmDatabaseSchema.procedure_cost pc1 + where revenue_code_concept_id is not null + group by revenue_code_concept_id + ; + +}:{ + + --HINT DISTRIBUTE_ON_KEY(stratum_1) + select 1610 as analysis_id, + CAST(revenue_code_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(pc1.cost_id) as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1610 + from + @cdmDatabaseSchema.cost pc1 + where revenue_code_concept_id is not null + and pc1.cost_domain_id = 'Procedure' + group by revenue_code_concept_id + ; + +} diff --git a/inst/sql/sql_server/analyses/1700.sql b/inst/sql/sql_server/analyses/1700.sql new file mode 100755 index 00000000..4965e365 --- /dev/null +++ b/inst/sql/sql_server/analyses/1700.sql @@ -0,0 +1,12 @@ +-- 1700 Number of records by cohort_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1700 as analysis_id, + CAST(cohort_definition_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(subject_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1700 +from + @resultsDatabaseSchema.cohort c1 +group by cohort_definition_id +; diff --git a/inst/sql/sql_server/analyses/1701.sql b/inst/sql/sql_server/analyses/1701.sql new file mode 100755 index 00000000..2a7960f0 --- /dev/null +++ b/inst/sql/sql_server/analyses/1701.sql @@ -0,0 +1,11 @@ +-- 1701 Number of records with cohort end date < cohort start date + + +select 1701 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(subject_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1701 +from + @resultsDatabaseSchema.cohort c1 +where c1.cohort_end_date < c1.cohort_start_date +; diff --git a/inst/sql/sql_server/analyses/1800.sql b/inst/sql/sql_server/analyses/1800.sql new file mode 100755 index 00000000..ee83d1af --- /dev/null +++ b/inst/sql/sql_server/analyses/1800.sql @@ -0,0 +1,12 @@ +-- 1800 Number of persons with at least one measurement occurrence, by measurement_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1800 as analysis_id, + CAST(m.measurement_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1800 +from + @cdmDatabaseSchema.measurement m +group by m.measurement_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/1801.sql b/inst/sql/sql_server/analyses/1801.sql new file mode 100755 index 00000000..1982cec9 --- /dev/null +++ b/inst/sql/sql_server/analyses/1801.sql @@ -0,0 +1,12 @@ +-- 1801 Number of measurement occurrence records, by measurement_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1801 as analysis_id, + CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1801 +from + @cdmDatabaseSchema.measurement m +group by m.measurement_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/1802.sql b/inst/sql/sql_server/analyses/1802.sql new file mode 100755 index 00000000..8f37cd42 --- /dev/null +++ b/inst/sql/sql_server/analyses/1802.sql @@ -0,0 +1,14 @@ +-- 1802 Number of persons by measurement occurrence start month, by measurement_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1802 as analysis_id, + CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(measurement_date)*100 + month(measurement_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1802 +from + @cdmDatabaseSchema.measurement m +group by m.measurement_concept_id, + YEAR(measurement_date)*100 + month(measurement_date) +; diff --git a/inst/sql/sql_server/analyses/1803.sql b/inst/sql/sql_server/analyses/1803.sql new file mode 100755 index 00000000..196259d3 --- /dev/null +++ b/inst/sql/sql_server/analyses/1803.sql @@ -0,0 +1,66 @@ +-- 1803 Number of distinct measurement occurrence concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select num_measurements as count_value + from + ( + select m.person_id, COUNT_BIG(distinct m.measurement_concept_id) as num_measurements + from + @cdmDatabaseSchema.measurement m + group by m.person_id + ) t0 +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 1803 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1803 +from #tempResults +; + +truncate table #tempResults; + +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1804.sql b/inst/sql/sql_server/analyses/1804.sql new file mode 100755 index 00000000..2651e4ca --- /dev/null +++ b/inst/sql/sql_server/analyses/1804.sql @@ -0,0 +1,18 @@ +-- 1804 Number of persons with at least one measurement occurrence, by measurement_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1804 as analysis_id, + CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(measurement_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(measurement_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1804 +from @cdmDatabaseSchema.PERSON p1 +inner join @cdmDatabaseSchema.measurement m on p1.person_id = m.person_id +group by m.measurement_concept_id, + YEAR(measurement_date), + p1.gender_concept_id, + floor((year(measurement_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/1805.sql b/inst/sql/sql_server/analyses/1805.sql new file mode 100755 index 00000000..a2c50430 --- /dev/null +++ b/inst/sql/sql_server/analyses/1805.sql @@ -0,0 +1,13 @@ +-- 1805 Number of measurement records, by measurement_concept_id by measurement_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1805 as analysis_id, + CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, + CAST(m.measurement_type_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1805 +from @cdmDatabaseSchema.measurement m +group by m.measurement_concept_id, + m.measurement_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/1806.sql b/inst/sql/sql_server/analyses/1806.sql new file mode 100755 index 00000000..cd23d0f0 --- /dev/null +++ b/inst/sql/sql_server/analyses/1806.sql @@ -0,0 +1,75 @@ +-- 1806 Distribution of age by measurement_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select o1.measurement_concept_id as subject_id, + p1.gender_concept_id, + o1.measurement_start_year - p1.year_of_birth as count_value +INTO #rawData_1806 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, measurement_concept_id, min(year(measurement_date)) as measurement_start_year + from @cdmDatabaseSchema.measurement + group by person_id, measurement_concept_id +) o1 +on p1.person_id = o1.person_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_1806 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_1806 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 1806 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1806 +from #tempResults +; + +truncate table #rawData_1806; +drop table #rawData_1806; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1807.sql b/inst/sql/sql_server/analyses/1807.sql new file mode 100755 index 00000000..3d5b6002 --- /dev/null +++ b/inst/sql/sql_server/analyses/1807.sql @@ -0,0 +1,12 @@ +-- 1807 Number of measurement occurrence records, by measurement_concept_id and unit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1807 as analysis_id, + CAST(m.measurement_concept_id AS VARCHAR(255)) as stratum_1, + CAST(m.unit_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1807 +from @cdmDatabaseSchema.measurement m +group by m.measurement_concept_id, m.unit_concept_id +; diff --git a/inst/sql/sql_server/analyses/1809.sql b/inst/sql/sql_server/analyses/1809.sql new file mode 100755 index 00000000..486657db --- /dev/null +++ b/inst/sql/sql_server/analyses/1809.sql @@ -0,0 +1,11 @@ +-- 1809 Number of measurement records with invalid person_id + + +select 1809 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1809 +from @cdmDatabaseSchema.measurement m + left join @cdmDatabaseSchema.PERSON p1 on p1.person_id = m.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/1810.sql b/inst/sql/sql_server/analyses/1810.sql new file mode 100755 index 00000000..3a4b6fd0 --- /dev/null +++ b/inst/sql/sql_server/analyses/1810.sql @@ -0,0 +1,13 @@ +-- 1810 Number of measurement records outside valid observation period + + +select 1810 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1810 +from @cdmDatabaseSchema.measurement m + left join @cdmDatabaseSchema.observation_period op on op.person_id = m.person_id + and m.measurement_date >= op.observation_period_start_date + and m.measurement_date <= op.observation_period_end_date +where op.person_id is null +; diff --git a/inst/sql/sql_server/analyses/1812.sql b/inst/sql/sql_server/analyses/1812.sql new file mode 100755 index 00000000..b79d798e --- /dev/null +++ b/inst/sql/sql_server/analyses/1812.sql @@ -0,0 +1,12 @@ +-- 1812 Number of measurement records with invalid provider_id + + +select 1812 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1812 +from @cdmDatabaseSchema.measurement m + left join @cdmDatabaseSchema.provider p on p.provider_id = m.provider_id +where m.provider_id is not null + and p.provider_id is null +; diff --git a/inst/sql/sql_server/analyses/1813.sql b/inst/sql/sql_server/analyses/1813.sql new file mode 100755 index 00000000..a627b067 --- /dev/null +++ b/inst/sql/sql_server/analyses/1813.sql @@ -0,0 +1,12 @@ +-- 1813 Number of observation records with invalid visit_id + + +select 1813 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1813 +from @cdmDatabaseSchema.measurement m + left join @cdmDatabaseSchema.visit_occurrence vo on m.visit_occurrence_id = vo.visit_occurrence_id +where m.visit_occurrence_id is not null + and vo.visit_occurrence_id is null +; diff --git a/inst/sql/sql_server/analyses/1814.sql b/inst/sql/sql_server/analyses/1814.sql new file mode 100755 index 00000000..6fc03ba8 --- /dev/null +++ b/inst/sql/sql_server/analyses/1814.sql @@ -0,0 +1,12 @@ +-- 1814 Number of measurement records with no value (numeric or concept) + + +select 1814 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1814 +from + @cdmDatabaseSchema.measurement m +where m.value_as_number is null + and m.value_as_concept_id is null +; diff --git a/inst/sql/sql_server/analyses/1815.sql b/inst/sql/sql_server/analyses/1815.sql new file mode 100755 index 00000000..bbcf4d82 --- /dev/null +++ b/inst/sql/sql_server/analyses/1815.sql @@ -0,0 +1,75 @@ +-- 1815 Distribution of numeric values, by measurement_concept_id and unit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select subject_id as stratum1_id, unit_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn +into #statsView +FROM +( + select measurement_concept_id as subject_id, + unit_concept_id, + CAST(value_as_number AS FLOAT) as count_value + from @cdmDatabaseSchema.measurement m + where m.unit_concept_id is not null + and m.value_as_number is not null +) A +group by subject_id, unit_concept_id, count_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select 1815 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from #statsView s + join #statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) p +join +( + select subject_id as stratum1_id, + unit_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM + ( + select measurement_concept_id as subject_id, + unit_concept_id, + CAST(value_as_number AS FLOAT) as count_value + from @cdmDatabaseSchema.measurement m + where m.unit_concept_id is not null + and m.value_as_number is not null + ) A + group by subject_id, unit_concept_id +) o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1815 +from #tempResults +; + +truncate table #statsView; +drop table #statsView; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1816.sql b/inst/sql/sql_server/analyses/1816.sql new file mode 100755 index 00000000..ac060070 --- /dev/null +++ b/inst/sql/sql_server/analyses/1816.sql @@ -0,0 +1,90 @@ +-- 1816 Distribution of low range, by measurement_concept_id and unit_concept_id + + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select subject_id as stratum1_id, + unit_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total +into #overallStats +FROM +( + select measurement_concept_id as subject_id, + unit_concept_id, + CAST(range_low AS FLOAT) as count_value + from @cdmDatabaseSchema.measurement m + where m.unit_concept_id is not null + and m.value_as_number is not null + and m.range_low is not null + and m.range_high is not null +) A +group by subject_id, unit_concept_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select + subject_id as stratum1_id, + unit_concept_id as stratum2_id, + count_value, count_big(*) as total, + row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn +into #statsView +FROM +( + select measurement_concept_id as subject_id, + unit_concept_id, + CAST(range_low AS FLOAT) as count_value + from @cdmDatabaseSchema.measurement m + where m.unit_concept_id is not null + and m.value_as_number is not null + and m.range_low is not null + and m.range_high is not null +) A +group by subject_id, unit_concept_id, count_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select 1816 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from #statsView s + join #statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) p +join #overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select + analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1816 +from #tempResults +; + +truncate table #overallStats; +drop table #overallStats; + +truncate table #statsView; +drop table #statsView; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1817.sql b/inst/sql/sql_server/analyses/1817.sql new file mode 100755 index 00000000..0f82690d --- /dev/null +++ b/inst/sql/sql_server/analyses/1817.sql @@ -0,0 +1,90 @@ +-- 1817 Distribution of high range, by observation_concept_id and unit_concept_id + + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select subject_id as stratum1_id, + unit_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total +into #overallStats +FROM +( + select measurement_concept_id as subject_id, + unit_concept_id, + CAST(range_high AS FLOAT) as count_value + from @cdmDatabaseSchema.measurement m + where m.unit_concept_id is not null + and m.value_as_number is not null + and m.range_low is not null + and m.range_high is not null +) A +group by subject_id, unit_concept_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select + subject_id as stratum1_id, + unit_concept_id as stratum2_id, + count_value, count_big(*) as total, + row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn +into #statsView +FROM +( + select measurement_concept_id as subject_id, + unit_concept_id, + CAST(range_high AS FLOAT) as count_value + from @cdmDatabaseSchema.measurement m + where m.unit_concept_id is not null + and m.value_as_number is not null + and m.range_low is not null + and m.range_high is not null +) A +group by subject_id, unit_concept_id, count_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select 1817 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from #statsView s + join #statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) p +join #overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select + analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_1817 +from #tempResults +; + +truncate table #overallStats; +drop table #overallStats; + +truncate table #statsView; +drop table #statsView; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/1818.sql b/inst/sql/sql_server/analyses/1818.sql new file mode 100755 index 00000000..e4f32bdf --- /dev/null +++ b/inst/sql/sql_server/analyses/1818.sql @@ -0,0 +1,37 @@ +-- 1818 Number of observation records below/within/above normal range, by observation_concept_id and unit_concept_id + + +--HINT DISTRIBUTE_ON_KEY(person_id) +select + person_id, + measurement_concept_id, + unit_concept_id, + CAST(case when value_as_number < range_low then 'Below Range Low' + when value_as_number >= range_low and value_as_number <= range_high then 'Within Range' + when value_as_number > range_high then 'Above Range High' + else 'Other' end AS VARCHAR(255)) as stratum_3 + into #rawData_1818 + from @cdmDatabaseSchema.measurement + where value_as_number is not null + and unit_concept_id is not null + and range_low is not null + and range_high is not null; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1818 as analysis_id, + CAST(measurement_concept_id AS VARCHAR(255)) as stratum_1, + CAST(unit_concept_id AS VARCHAR(255)) as stratum_2, + CAST(stratum_3 AS VARCHAR(255)) as stratum_3, + null as stratum_4, + null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1818 +from #rawData_1818 +group by measurement_concept_id, + unit_concept_id, + stratum_3 +; + +truncate table #rawData_1818; +drop table #rawData_1818; + diff --git a/inst/sql/sql_server/analyses/1820.sql b/inst/sql/sql_server/analyses/1820.sql new file mode 100755 index 00000000..71e2047f --- /dev/null +++ b/inst/sql/sql_server/analyses/1820.sql @@ -0,0 +1,11 @@ +-- 1820 Number of observation records by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1820 as analysis_id, + CAST(YEAR(measurement_date)*100 + month(measurement_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1820 +from @cdmDatabaseSchema.measurement m +group by YEAR(measurement_date)*100 + month(measurement_date) +; diff --git a/inst/sql/sql_server/analyses/1821.sql b/inst/sql/sql_server/analyses/1821.sql new file mode 100755 index 00000000..77a9ec01 --- /dev/null +++ b/inst/sql/sql_server/analyses/1821.sql @@ -0,0 +1,11 @@ +-- 1821 Number of measurement records with no numeric value + + +select 1821 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1821 +from + @cdmDatabaseSchema.measurement m +where m.value_as_number is null +; diff --git a/inst/sql/sql_server/analyses/1891.sql b/inst/sql/sql_server/analyses/1891.sql new file mode 100755 index 00000000..50627089 --- /dev/null +++ b/inst/sql/sql_server/analyses/1891.sql @@ -0,0 +1,22 @@ +-- 1891 Number of total persons that have at least x measurements + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1891 as analysis_id, + CAST(measurement_concept_id as varchar(255)) as stratum_1, + CAST(meas_cnt as varchar(255)) as stratum_2, + null as stratum_3, + null as stratum_4, + null as stratum_5, + sum(count(person_id)) over (partition by measurement_concept_id order by meas_cnt desc) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1891 +from +( + select + m.measurement_concept_id, + count(m.measurement_id) as meas_cnt, + m.person_id + from @cdmDatabaseSchema.measurement m + group by m.person_id, m.measurement_concept_id +) cnt_q +group by cnt_q.measurement_concept_id, cnt_q.meas_cnt +; diff --git a/inst/sql/sql_server/analyses/1900.sql b/inst/sql/sql_server/analyses/1900.sql new file mode 100755 index 00000000..81113731 --- /dev/null +++ b/inst/sql/sql_server/analyses/1900.sql @@ -0,0 +1,19 @@ +-- 1900 concept_0 report + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 1900 as analysis_id, CAST(table_name AS VARCHAR(255)) as stratum_1, source_value as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +cnt as count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_1900 + from ( +select 'measurement' as table_name,measurement_source_value as source_value, COUNT_BIG(*) as cnt from @cdmDatabaseSchema.measurement where measurement_concept_id = 0 group by measurement_source_value +union +select 'procedure_occurrence' as table_name,procedure_source_value as source_value, COUNT_BIG(*) as cnt from @cdmDatabaseSchema.procedure_occurrence where procedure_concept_id = 0 group by procedure_source_value +union +select 'drug_exposure' as table_name,drug_source_value as source_value, COUNT_BIG(*) as cnt from @cdmDatabaseSchema.drug_exposure where drug_concept_id = 0 group by drug_source_value +union +select 'condition_occurrence' as table_name,condition_source_value as source_value, COUNT_BIG(*) as cnt from @cdmDatabaseSchema.condition_occurrence where condition_concept_id = 0 group by condition_source_value +) a +where cnt >= 1 --use other threshold if needed (e.g., 10) +--order by a.table_name desc, cnt desc +; diff --git a/inst/sql/sql_server/analyses/2.sql b/inst/sql/sql_server/analyses/2.sql new file mode 100755 index 00000000..acf99e26 --- /dev/null +++ b/inst/sql/sql_server/analyses/2.sql @@ -0,0 +1,10 @@ +-- 2 Number of persons by gender + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2 as analysis_id, +CAST(gender_concept_id AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2 +from @cdmDatabaseSchema.PERSON +group by GENDER_CONCEPT_ID; diff --git a/inst/sql/sql_server/analyses/200.sql b/inst/sql/sql_server/analyses/200.sql new file mode 100755 index 00000000..39899088 --- /dev/null +++ b/inst/sql/sql_server/analyses/200.sql @@ -0,0 +1,12 @@ +-- 200 Number of persons with at least one visit occurrence, by visit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 200 as analysis_id, + CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct vo1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_200 +from + @cdmDatabaseSchema.visit_occurrence vo1 +group by vo1.visit_concept_id +; diff --git a/inst/sql/sql_server/analyses/2000.sql b/inst/sql/sql_server/analyses/2000.sql new file mode 100755 index 00000000..3a05c5c9 --- /dev/null +++ b/inst/sql/sql_server/analyses/2000.sql @@ -0,0 +1,16 @@ +-- 2000 patients with at least 1 Dx and 1 Rx + + +select 2000 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +--gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value + CAST(a.cnt AS BIGINT) AS count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2000 + FROM ( + select COUNT_BIG(*) cnt from ( + select distinct person_id from @cdmDatabaseSchema.condition_occurrence + intersect + select distinct person_id from @cdmDatabaseSchema.drug_exposure + ) b + ) a + ; diff --git a/inst/sql/sql_server/analyses/2001.sql b/inst/sql/sql_server/analyses/2001.sql new file mode 100755 index 00000000..0c1c6be1 --- /dev/null +++ b/inst/sql/sql_server/analyses/2001.sql @@ -0,0 +1,16 @@ +-- 2001 patients with at least 1 Dx and 1 Proc + + +select 2001 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +--gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value + CAST(a.cnt AS BIGINT) AS count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2001 + FROM ( + select COUNT_BIG(*) cnt from ( + select distinct person_id from @cdmDatabaseSchema.condition_occurrence + intersect + select distinct person_id from @cdmDatabaseSchema.procedure_occurrence + ) b + ) a + ; diff --git a/inst/sql/sql_server/analyses/2002.sql b/inst/sql/sql_server/analyses/2002.sql new file mode 100755 index 00000000..2cd39463 --- /dev/null +++ b/inst/sql/sql_server/analyses/2002.sql @@ -0,0 +1,18 @@ +-- 2002 patients with at least 1 Mes and 1 Dx and 1 Rx + + +select 2002 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +--gender_concept_id as stratum_1, COUNT_BIG(distinct person_id) as count_value + CAST(a.cnt AS BIGINT) AS count_value + into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2002 + FROM ( + select COUNT_BIG(*) cnt from ( + select distinct person_id from @cdmDatabaseSchema.measurement + intersect + select distinct person_id from @cdmDatabaseSchema.condition_occurrence + intersect + select distinct person_id from @cdmDatabaseSchema.drug_exposure + ) b + ) a + ; diff --git a/inst/sql/sql_server/analyses/2003.sql b/inst/sql/sql_server/analyses/2003.sql new file mode 100755 index 00000000..255745b5 --- /dev/null +++ b/inst/sql/sql_server/analyses/2003.sql @@ -0,0 +1,10 @@ +-- 2003 Patients with at least one visit +-- this analysis is in fact redundant, since it is possible to get it via +-- dist analysis 203 and query select count_value from achilles_results_dist where analysis_id = 203; + + +select 2003 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2003 +from @cdmDatabaseSchema.visit_occurrence; diff --git a/inst/sql/sql_server/analyses/201.sql b/inst/sql/sql_server/analyses/201.sql new file mode 100755 index 00000000..7db88920 --- /dev/null +++ b/inst/sql/sql_server/analyses/201.sql @@ -0,0 +1,12 @@ +-- 201 Number of visit occurrence records, by visit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 201 as analysis_id, + CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(vo1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_201 +from + @cdmDatabaseSchema.visit_occurrence vo1 +group by vo1.visit_concept_id +; diff --git a/inst/sql/sql_server/analyses/202.sql b/inst/sql/sql_server/analyses/202.sql new file mode 100755 index 00000000..e1bb2faf --- /dev/null +++ b/inst/sql/sql_server/analyses/202.sql @@ -0,0 +1,14 @@ +-- 202 Number of persons by visit occurrence start month, by visit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 202 as analysis_id, + CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(visit_start_date)*100 + month(visit_start_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_202 +from +@cdmDatabaseSchema.visit_occurrence vo1 +group by vo1.visit_concept_id, + YEAR(visit_start_date)*100 + month(visit_start_date) +; diff --git a/inst/sql/sql_server/analyses/203.sql b/inst/sql/sql_server/analyses/203.sql new file mode 100755 index 00000000..79b76848 --- /dev/null +++ b/inst/sql/sql_server/analyses/203.sql @@ -0,0 +1,60 @@ +-- 203 Number of distinct visit occurrence concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(person_id, count_value) as +( + select vo1.person_id, COUNT_BIG(distinct vo1.visit_concept_id) as count_value + from @cdmDatabaseSchema.visit_occurrence vo1 + group by vo1.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 203 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +INTO #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_203 +FROM #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/204.sql b/inst/sql/sql_server/analyses/204.sql new file mode 100755 index 00000000..1eb6894f --- /dev/null +++ b/inst/sql/sql_server/analyses/204.sql @@ -0,0 +1,20 @@ +-- 204 Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 204 as analysis_id, + CAST(vo1.visit_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(visit_start_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(visit_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_204 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.visit_occurrence vo1 +on p1.person_id = vo1.person_id +group by vo1.visit_concept_id, + YEAR(visit_start_date), + p1.gender_concept_id, + floor((year(visit_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/206.sql b/inst/sql/sql_server/analyses/206.sql new file mode 100755 index 00000000..5b425dc7 --- /dev/null +++ b/inst/sql/sql_server/analyses/206.sql @@ -0,0 +1,70 @@ +-- 206 Distribution of age by visit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with rawData(stratum1_id, stratum2_id, count_value) as +( + select vo1.visit_concept_id, + p1.gender_concept_id, + vo1.visit_start_year - p1.year_of_birth as count_value + from @cdmDatabaseSchema.PERSON p1 + inner join + ( + select person_id, visit_concept_id, min(year(visit_start_date)) as visit_start_year + from @cdmDatabaseSchema.visit_occurrence + group by person_id, visit_concept_id + ) vo1 on p1.person_id = vo1.person_id +), +overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum1_id, + stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum1_id, stratum2_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select stratum1_id, stratum2_id, count_value, count_big(*) as total, row_number() over (partition by stratum1_id, stratum2_id order by count_value) as rn + FROM rawData + group by stratum1_id, stratum2_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 206 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_206 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/207.sql b/inst/sql/sql_server/analyses/207.sql new file mode 100755 index 00000000..012c8b35 --- /dev/null +++ b/inst/sql/sql_server/analyses/207.sql @@ -0,0 +1,13 @@ +--207 Number of visit records with invalid person_id + + +select 207 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(vo1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_207 +from + @cdmDatabaseSchema.visit_occurrence vo1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = vo1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/208.sql b/inst/sql/sql_server/analyses/208.sql new file mode 100755 index 00000000..591a72fd --- /dev/null +++ b/inst/sql/sql_server/analyses/208.sql @@ -0,0 +1,15 @@ +--208 Number of visit records outside valid observation period + + +select 208 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(vo1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_208 +from + @cdmDatabaseSchema.visit_occurrence vo1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = vo1.person_id + and vo1.visit_start_date >= op1.observation_period_start_date + and vo1.visit_start_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/209.sql b/inst/sql/sql_server/analyses/209.sql new file mode 100755 index 00000000..1e07c3e2 --- /dev/null +++ b/inst/sql/sql_server/analyses/209.sql @@ -0,0 +1,11 @@ +--209 Number of visit records with end date < start date + + +select 209 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(vo1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_209 +from + @cdmDatabaseSchema.visit_occurrence vo1 +where visit_end_date < visit_start_date +; diff --git a/inst/sql/sql_server/analyses/210.sql b/inst/sql/sql_server/analyses/210.sql new file mode 100755 index 00000000..2cd26b2a --- /dev/null +++ b/inst/sql/sql_server/analyses/210.sql @@ -0,0 +1,14 @@ +--210 Number of visit records with invalid care_site_id + + +select 210 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(vo1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_210 +from + @cdmDatabaseSchema.visit_occurrence vo1 + left join @cdmDatabaseSchema.care_site cs1 + on vo1.care_site_id = cs1.care_site_id +where vo1.care_site_id is not null + and cs1.care_site_id is null +; diff --git a/inst/sql/sql_server/analyses/2100.sql b/inst/sql/sql_server/analyses/2100.sql new file mode 100755 index 00000000..10f142c5 --- /dev/null +++ b/inst/sql/sql_server/analyses/2100.sql @@ -0,0 +1,12 @@ +-- 2100 Number of persons with at least one device exposure , by device_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2100 as analysis_id, + CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2100 +from + @cdmDatabaseSchema.device_exposure m +group by m.device_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/2101.sql b/inst/sql/sql_server/analyses/2101.sql new file mode 100755 index 00000000..0df35185 --- /dev/null +++ b/inst/sql/sql_server/analyses/2101.sql @@ -0,0 +1,12 @@ +-- 2101 Number of device exposure records, by device_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2101 as analysis_id, + CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2101 +from + @cdmDatabaseSchema.device_exposure m +group by m.device_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/2102.sql b/inst/sql/sql_server/analyses/2102.sql new file mode 100755 index 00000000..53930a2e --- /dev/null +++ b/inst/sql/sql_server/analyses/2102.sql @@ -0,0 +1,14 @@ +-- 2102 Number of persons by device by start month, by device_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2102 as analysis_id, + CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(YEAR(device_exposure_start_date)*100 + month(device_exposure_start_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2102 +from + @cdmDatabaseSchema.device_exposure m +group by m.device_CONCEPT_ID, + YEAR(device_exposure_start_date)*100 + month(device_exposure_start_date) +; diff --git a/inst/sql/sql_server/analyses/2104.sql b/inst/sql/sql_server/analyses/2104.sql new file mode 100755 index 00000000..314bbe60 --- /dev/null +++ b/inst/sql/sql_server/analyses/2104.sql @@ -0,0 +1,18 @@ +-- 2104 Number of persons with at least one device occurrence, by device_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2104 as analysis_id, + CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(YEAR(device_exposure_start_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(device_exposure_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2104 +from @cdmDatabaseSchema.PERSON p1 +inner join @cdmDatabaseSchema.device_exposure m on p1.person_id = m.person_id +group by m.device_CONCEPT_ID, + YEAR(device_exposure_start_date), + p1.gender_concept_id, + floor((year(device_exposure_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/2105.sql b/inst/sql/sql_server/analyses/2105.sql new file mode 100755 index 00000000..e9179435 --- /dev/null +++ b/inst/sql/sql_server/analyses/2105.sql @@ -0,0 +1,13 @@ +-- 2105 Number of exposure records by device_concept_id by device_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2105 as analysis_id, + CAST(m.device_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(m.device_type_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2105 +from @cdmDatabaseSchema.device_exposure m +group by m.device_CONCEPT_ID, + m.device_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/211.sql b/inst/sql/sql_server/analyses/211.sql new file mode 100755 index 00000000..6f7e182c --- /dev/null +++ b/inst/sql/sql_server/analyses/211.sql @@ -0,0 +1,60 @@ +-- 211 Distribution of length of stay by visit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_id) +with rawData(stratum_id, count_value) as +( + select visit_concept_id, datediff(dd,visit_start_date,visit_end_date) as count_value + from @cdmDatabaseSchema.visit_occurrence +), +overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum_id +), +statsView (stratum_id, count_value, total, rn) as +( + select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn + FROM rawData + group by stratum_id, count_value +), +priorStats (stratum_id, count_value, total, accumulated) as +( + select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn + group by s.stratum_id, s.count_value, s.total, s.rn +) +select 211 as analysis_id, + CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum_id = o.stratum_id +GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_211 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/212.sql b/inst/sql/sql_server/analyses/212.sql new file mode 100755 index 00000000..eeec483d --- /dev/null +++ b/inst/sql/sql_server/analyses/212.sql @@ -0,0 +1,19 @@ +-- 212 Number of persons with at least one visit occurrence by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 212 as analysis_id, + CAST(YEAR(visit_start_date) AS VARCHAR(255)) as stratum_1, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_2, + CAST(floor((year(visit_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_3, + null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_212 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.visit_occurrence vo1 +on p1.person_id = vo1.person_id +group by + YEAR(visit_start_date), + p1.gender_concept_id, + floor((year(visit_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/220.sql b/inst/sql/sql_server/analyses/220.sql new file mode 100755 index 00000000..d9447809 --- /dev/null +++ b/inst/sql/sql_server/analyses/220.sql @@ -0,0 +1,12 @@ +-- 220 Number of visit occurrence records by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 220 as analysis_id, + CAST(YEAR(visit_start_date)*100 + month(visit_start_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_220 +from +@cdmDatabaseSchema.visit_occurrence vo1 +group by YEAR(visit_start_date)*100 + month(visit_start_date) +; diff --git a/inst/sql/sql_server/analyses/2200.sql b/inst/sql/sql_server/analyses/2200.sql new file mode 100755 index 00000000..b8ed444c --- /dev/null +++ b/inst/sql/sql_server/analyses/2200.sql @@ -0,0 +1,12 @@ +-- 2200 Number of persons with at least one device exposure , by device_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2200 as analysis_id, + CAST(m.note_type_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2200 +from + @cdmDatabaseSchema.note m +group by m.note_type_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/2201.sql b/inst/sql/sql_server/analyses/2201.sql new file mode 100755 index 00000000..86f3d559 --- /dev/null +++ b/inst/sql/sql_server/analyses/2201.sql @@ -0,0 +1,12 @@ +-- 2201 Number of device exposure records, by device_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 2201 as analysis_id, + CAST(m.note_type_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(m.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_2201 +from + @cdmDatabaseSchema.note m +group by m.note_type_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/221.sql b/inst/sql/sql_server/analyses/221.sql new file mode 100755 index 00000000..87afd657 --- /dev/null +++ b/inst/sql/sql_server/analyses/221.sql @@ -0,0 +1,12 @@ +-- 221 Number of persons by visit start year + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 221 as analysis_id, + CAST(YEAR(visit_start_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_221 +from +@cdmDatabaseSchema.visit_occurrence vo1 +group by YEAR(visit_start_date) +; diff --git a/inst/sql/sql_server/analyses/3.sql b/inst/sql/sql_server/analyses/3.sql new file mode 100755 index 00000000..f5f7918d --- /dev/null +++ b/inst/sql/sql_server/analyses/3.sql @@ -0,0 +1,9 @@ +-- 3 Number of persons by year of birth + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 3 as analysis_id, CAST(year_of_birth AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_3 +from @cdmDatabaseSchema.PERSON +group by YEAR_OF_BIRTH; diff --git a/inst/sql/sql_server/analyses/300.sql b/inst/sql/sql_server/analyses/300.sql new file mode 100755 index 00000000..7207481c --- /dev/null +++ b/inst/sql/sql_server/analyses/300.sql @@ -0,0 +1,8 @@ +-- 300 Number of providers + + +select 300 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct provider_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_300 +from @cdmDatabaseSchema.provider; diff --git a/inst/sql/sql_server/analyses/301.sql b/inst/sql/sql_server/analyses/301.sql new file mode 100755 index 00000000..274e6f0c --- /dev/null +++ b/inst/sql/sql_server/analyses/301.sql @@ -0,0 +1,10 @@ +-- 301 Number of providers by specialty concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 301 as analysis_id, +CAST(specialty_concept_id AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct provider_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_301 +from @cdmDatabaseSchema.provider +group by specialty_CONCEPT_ID; diff --git a/inst/sql/sql_server/analyses/302.sql b/inst/sql/sql_server/analyses/302.sql new file mode 100755 index 00000000..f87d6928 --- /dev/null +++ b/inst/sql/sql_server/analyses/302.sql @@ -0,0 +1,13 @@ +-- 302 Number of providers with invalid care site id + + +select 302 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(provider_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_302 +from @cdmDatabaseSchema.provider p1 + left join @cdmDatabaseSchema.care_site cs1 + on p1.care_site_id = cs1.care_site_id +where p1.care_site_id is not null + and cs1.care_site_id is null +; diff --git a/inst/sql/sql_server/analyses/4.sql b/inst/sql/sql_server/analyses/4.sql new file mode 100755 index 00000000..a068ae22 --- /dev/null +++ b/inst/sql/sql_server/analyses/4.sql @@ -0,0 +1,9 @@ +-- 4 Number of persons by race + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 4 as analysis_id, CAST(RACE_CONCEPT_ID AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_4 +from @cdmDatabaseSchema.PERSON +group by RACE_CONCEPT_ID; diff --git a/inst/sql/sql_server/analyses/400.sql b/inst/sql/sql_server/analyses/400.sql new file mode 100755 index 00000000..503730df --- /dev/null +++ b/inst/sql/sql_server/analyses/400.sql @@ -0,0 +1,12 @@ +-- 400 Number of persons with at least one condition occurrence, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 400 as analysis_id, + CAST(co1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_400 +from + @cdmDatabaseSchema.condition_occurrence co1 +group by co1.condition_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/401.sql b/inst/sql/sql_server/analyses/401.sql new file mode 100755 index 00000000..bf5ff26d --- /dev/null +++ b/inst/sql/sql_server/analyses/401.sql @@ -0,0 +1,12 @@ +-- 401 Number of condition occurrence records, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 401 as analysis_id, + CAST(co1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_401 +from + @cdmDatabaseSchema.condition_occurrence co1 +group by co1.condition_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/402.sql b/inst/sql/sql_server/analyses/402.sql new file mode 100755 index 00000000..b5f811e2 --- /dev/null +++ b/inst/sql/sql_server/analyses/402.sql @@ -0,0 +1,14 @@ +-- 402 Number of persons by condition occurrence start month, by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 402 as analysis_id, + CAST(co1.condition_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(condition_start_date)*100 + month(condition_start_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_402 +from +@cdmDatabaseSchema.condition_occurrence co1 +group by co1.condition_concept_id, + YEAR(condition_start_date)*100 + month(condition_start_date) +; diff --git a/inst/sql/sql_server/analyses/403.sql b/inst/sql/sql_server/analyses/403.sql new file mode 100755 index 00000000..36824892 --- /dev/null +++ b/inst/sql/sql_server/analyses/403.sql @@ -0,0 +1,60 @@ +-- 403 Number of distinct condition occurrence concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(person_id, count_value) as +( + select person_id, COUNT_BIG(distinct condition_concept_id) as num_conditions + from @cdmDatabaseSchema.condition_occurrence + group by person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 403 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_403 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/404.sql b/inst/sql/sql_server/analyses/404.sql new file mode 100755 index 00000000..ec39b48b --- /dev/null +++ b/inst/sql/sql_server/analyses/404.sql @@ -0,0 +1,20 @@ +-- 404 Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 404 as analysis_id, + CAST(co1.condition_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(condition_start_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(condition_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_404 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.condition_occurrence co1 +on p1.person_id = co1.person_id +group by co1.condition_concept_id, + YEAR(condition_start_date), + p1.gender_concept_id, + floor((year(condition_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/405.sql b/inst/sql/sql_server/analyses/405.sql new file mode 100755 index 00000000..f3c789be --- /dev/null +++ b/inst/sql/sql_server/analyses/405.sql @@ -0,0 +1,14 @@ +-- 405 Number of condition occurrence records, by condition_concept_id by condition_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 405 as analysis_id, + CAST(co1.condition_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(co1.condition_type_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_405 +from + @cdmDatabaseSchema.condition_occurrence co1 +group by co1.condition_CONCEPT_ID, + co1.condition_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/406.sql b/inst/sql/sql_server/analyses/406.sql new file mode 100755 index 00000000..c93cc86f --- /dev/null +++ b/inst/sql/sql_server/analyses/406.sql @@ -0,0 +1,74 @@ +-- 406 Distribution of age by condition_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select co1.condition_concept_id as subject_id, + p1.gender_concept_id, + (co1.condition_start_year - p1.year_of_birth) as count_value +INTO #rawData_406 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, condition_concept_id, min(year(condition_start_date)) as condition_start_year + from @cdmDatabaseSchema.condition_occurrence + group by person_id, condition_concept_id +) co1 on p1.person_id = co1.person_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_406 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_406 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 406 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +INTO #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_406 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; + +truncate Table #rawData_406; +drop table #rawData_406; diff --git a/inst/sql/sql_server/analyses/409.sql b/inst/sql/sql_server/analyses/409.sql new file mode 100755 index 00000000..cd5434f2 --- /dev/null +++ b/inst/sql/sql_server/analyses/409.sql @@ -0,0 +1,13 @@ +-- 409 Number of condition occurrence records with invalid person_id + + +select 409 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_409 +from + @cdmDatabaseSchema.condition_occurrence co1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = co1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/410.sql b/inst/sql/sql_server/analyses/410.sql new file mode 100755 index 00000000..87fa3b5e --- /dev/null +++ b/inst/sql/sql_server/analyses/410.sql @@ -0,0 +1,15 @@ +-- 410 Number of condition occurrence records outside valid observation period + + +select 410 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_410 +from + @cdmDatabaseSchema.condition_occurrence co1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = co1.person_id + and co1.condition_start_date >= op1.observation_period_start_date + and co1.condition_start_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/411.sql b/inst/sql/sql_server/analyses/411.sql new file mode 100755 index 00000000..80c6ea96 --- /dev/null +++ b/inst/sql/sql_server/analyses/411.sql @@ -0,0 +1,11 @@ +-- 411 Number of condition occurrence records with end date < start date + + +select 411 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_411 +from + @cdmDatabaseSchema.condition_occurrence co1 +where co1.condition_end_date < co1.condition_start_date +; diff --git a/inst/sql/sql_server/analyses/412.sql b/inst/sql/sql_server/analyses/412.sql new file mode 100755 index 00000000..5959e4f7 --- /dev/null +++ b/inst/sql/sql_server/analyses/412.sql @@ -0,0 +1,14 @@ +-- 412 Number of condition occurrence records with invalid provider_id + + +select 412 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_412 +from + @cdmDatabaseSchema.condition_occurrence co1 + left join @cdmDatabaseSchema.provider p1 + on p1.provider_id = co1.provider_id +where co1.provider_id is not null + and p1.provider_id is null +; diff --git a/inst/sql/sql_server/analyses/413.sql b/inst/sql/sql_server/analyses/413.sql new file mode 100755 index 00000000..99c64c7e --- /dev/null +++ b/inst/sql/sql_server/analyses/413.sql @@ -0,0 +1,14 @@ +-- 413 Number of condition occurrence records with invalid visit_id + + +select 413 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(co1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_413 +from + @cdmDatabaseSchema.condition_occurrence co1 + left join @cdmDatabaseSchema.visit_occurrence vo1 + on co1.visit_occurrence_id = vo1.visit_occurrence_id +where co1.visit_occurrence_id is not null + and vo1.visit_occurrence_id is null +; diff --git a/inst/sql/sql_server/analyses/420.sql b/inst/sql/sql_server/analyses/420.sql new file mode 100755 index 00000000..bba02a9f --- /dev/null +++ b/inst/sql/sql_server/analyses/420.sql @@ -0,0 +1,12 @@ +-- 420 Number of condition occurrence records by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 420 as analysis_id, + CAST(YEAR(condition_start_date)*100 + month(condition_start_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_420 +from +@cdmDatabaseSchema.condition_occurrence co1 +group by YEAR(condition_start_date)*100 + month(condition_start_date) +; diff --git a/inst/sql/sql_server/analyses/5.sql b/inst/sql/sql_server/analyses/5.sql new file mode 100755 index 00000000..eb895ac9 --- /dev/null +++ b/inst/sql/sql_server/analyses/5.sql @@ -0,0 +1,9 @@ +-- 5 Number of persons by ethnicity + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 5 as analysis_id, CAST(ETHNICITY_CONCEPT_ID AS VARCHAR(255)) as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(distinct person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_5 +from @cdmDatabaseSchema.PERSON +group by ETHNICITY_CONCEPT_ID; diff --git a/inst/sql/sql_server/analyses/500.sql b/inst/sql/sql_server/analyses/500.sql new file mode 100755 index 00000000..10a3edc8 --- /dev/null +++ b/inst/sql/sql_server/analyses/500.sql @@ -0,0 +1,12 @@ +-- 500 Number of persons with death, by cause_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 500 as analysis_id, + CAST(d1.cause_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct d1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_500 +from + @cdmDatabaseSchema.death d1 +group by d1.cause_concept_id +; diff --git a/inst/sql/sql_server/analyses/501.sql b/inst/sql/sql_server/analyses/501.sql new file mode 100755 index 00000000..0bdd984e --- /dev/null +++ b/inst/sql/sql_server/analyses/501.sql @@ -0,0 +1,12 @@ +-- 501 Number of records of death, by cause_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 501 as analysis_id, + CAST(d1.cause_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(d1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_501 +from + @cdmDatabaseSchema.death d1 +group by d1.cause_concept_id +; diff --git a/inst/sql/sql_server/analyses/502.sql b/inst/sql/sql_server/analyses/502.sql new file mode 100755 index 00000000..225eb7e6 --- /dev/null +++ b/inst/sql/sql_server/analyses/502.sql @@ -0,0 +1,12 @@ +-- 502 Number of persons by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 502 as analysis_id, + CAST(YEAR(death_date)*100 + month(death_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_502 +from +@cdmDatabaseSchema.death d1 +group by YEAR(death_date)*100 + month(death_date) +; diff --git a/inst/sql/sql_server/analyses/504.sql b/inst/sql/sql_server/analyses/504.sql new file mode 100755 index 00000000..afd2b713 --- /dev/null +++ b/inst/sql/sql_server/analyses/504.sql @@ -0,0 +1,18 @@ +-- 504 Number of persons with a death, by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 504 as analysis_id, + CAST(YEAR(death_date) AS VARCHAR(255)) as stratum_1, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_2, + CAST(floor((year(death_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_3, + null as stratum_4, null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_504 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.death d1 +on p1.person_id = d1.person_id +group by YEAR(death_date), + p1.gender_concept_id, + floor((year(death_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/505.sql b/inst/sql/sql_server/analyses/505.sql new file mode 100755 index 00000000..685fe13b --- /dev/null +++ b/inst/sql/sql_server/analyses/505.sql @@ -0,0 +1,12 @@ +-- 505 Number of death records, by death_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 505 as analysis_id, + CAST(death_type_concept_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_505 +from + @cdmDatabaseSchema.death d1 +group by death_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/506.sql b/inst/sql/sql_server/analyses/506.sql new file mode 100755 index 00000000..ee04a491 --- /dev/null +++ b/inst/sql/sql_server/analyses/506.sql @@ -0,0 +1,69 @@ +-- 506 Distribution of age by condition_concept_id + + +--HINT DISTRIBUTE_ON_KEY(stratum_id) +with rawData(stratum_id, count_value) as +( + select p1.gender_concept_id, + d1.death_year - p1.year_of_birth as count_value + from @cdmDatabaseSchema.PERSON p1 + inner join + (select person_id, min(year(death_date)) as death_year + from @cdmDatabaseSchema.death + group by person_id + ) d1 + on p1.person_id = d1.person_id +), +overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum_id +), +statsView (stratum_id, count_value, total, rn) as +( + select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn + FROM rawData + group by stratum_id, count_value +), +priorStats (stratum_id, count_value, total, accumulated) as +( + select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn + group by s.stratum_id, s.count_value, s.total, s.rn +) +select 506 as analysis_id, + CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum_id = o.stratum_id +GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_506 +from #tempResults +; + +truncate table #tempResults; + +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/509.sql b/inst/sql/sql_server/analyses/509.sql new file mode 100755 index 00000000..ebfbb592 --- /dev/null +++ b/inst/sql/sql_server/analyses/509.sql @@ -0,0 +1,13 @@ +-- 509 Number of death records with invalid person_id + + +select 509 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(d1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_509 +from + @cdmDatabaseSchema.death d1 + left join @cdmDatabaseSchema.person p1 + on d1.person_id = p1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/510.sql b/inst/sql/sql_server/analyses/510.sql new file mode 100755 index 00000000..2c6378cc --- /dev/null +++ b/inst/sql/sql_server/analyses/510.sql @@ -0,0 +1,15 @@ +-- 510 Number of death records outside valid observation period + + +select 510 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(d1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_510 +from + @cdmDatabaseSchema.death d1 + left join @cdmDatabaseSchema.observation_period op1 + on d1.person_id = op1.person_id + and d1.death_date >= op1.observation_period_start_date + and d1.death_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/511.sql b/inst/sql/sql_server/analyses/511.sql new file mode 100755 index 00000000..a4b0ad70 --- /dev/null +++ b/inst/sql/sql_server/analyses/511.sql @@ -0,0 +1,29 @@ +-- 511 Distribution of time from death to last condition + +--HINT DISTRIBUTE_ON_KEY(count_value) +select 511 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(count_value) as count_value, + min(count_value) as min_value, + max(count_value) as max_value, + CAST(avg(1.0*count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + max(case when p1<=0.50 then count_value else -9999 end) as median_value, + max(case when p1<=0.10 then count_value else -9999 end) as p10_value, + max(case when p1<=0.25 then count_value else -9999 end) as p25_value, + max(case when p1<=0.75 then count_value else -9999 end) as p75_value, + max(case when p1<=0.90 then count_value else -9999 end) as p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_511 +from +( +select datediff(dd,d1.death_date, t0.max_date) as count_value, + 1.0*(row_number() over (order by datediff(dd,d1.death_date, t0.max_date)))/(COUNT_BIG(*) over () + 1) as p1 +from @cdmDatabaseSchema.death d1 + inner join + ( + select person_id, max(condition_start_date) as max_date + from @cdmDatabaseSchema.condition_occurrence + group by person_id + ) t0 on d1.person_id = t0.person_id +) t1 +; diff --git a/inst/sql/sql_server/analyses/512.sql b/inst/sql/sql_server/analyses/512.sql new file mode 100755 index 00000000..510a6a7b --- /dev/null +++ b/inst/sql/sql_server/analyses/512.sql @@ -0,0 +1,67 @@ +-- 512 Distribution of time from death to last drug + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select datediff(dd,d1.death_date, t0.max_date) as count_value + from @cdmDatabaseSchema.death d1 + inner join + ( + select person_id, max(drug_exposure_start_date) as max_date + from @cdmDatabaseSchema.drug_exposure + group by person_id + ) t0 + on d1.person_id = t0.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 512 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_512 +FROM #tempResults +; + +truncate table #tempResults; + +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/513.sql b/inst/sql/sql_server/analyses/513.sql new file mode 100755 index 00000000..a75374b7 --- /dev/null +++ b/inst/sql/sql_server/analyses/513.sql @@ -0,0 +1,67 @@ +-- 513 Distribution of time from death to last visit + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select datediff(dd,d1.death_date, t0.max_date) as count_value + from @cdmDatabaseSchema.death d1 + inner join + ( + select person_id, max(visit_start_date) as max_date + from @cdmDatabaseSchema.visit_occurrence + group by person_id + ) t0 + on d1.person_id = t0.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 513 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_513 +from #tempResults +; + +truncate table #tempResults; + +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/514.sql b/inst/sql/sql_server/analyses/514.sql new file mode 100755 index 00000000..77630c7a --- /dev/null +++ b/inst/sql/sql_server/analyses/514.sql @@ -0,0 +1,67 @@ +-- 514 Distribution of time from death to last procedure + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select datediff(dd,d1.death_date, t0.max_date) as count_value + from @cdmDatabaseSchema.death d1 + inner join + ( + select person_id, max(procedure_date) as max_date + from @cdmDatabaseSchema.procedure_occurrence + group by person_id + ) t0 + on d1.person_id = t0.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 514 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_514 +from #tempResults +; + +truncate table #tempResults; + +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/515.sql b/inst/sql/sql_server/analyses/515.sql new file mode 100755 index 00000000..59e281d4 --- /dev/null +++ b/inst/sql/sql_server/analyses/515.sql @@ -0,0 +1,66 @@ +-- 515 Distribution of time from death to last observation + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select datediff(dd,d1.death_date, t0.max_date) as count_value + from @cdmDatabaseSchema.death d1 + inner join + ( + select person_id, max(observation_date) as max_date + from @cdmDatabaseSchema.observation + group by person_id + ) t0 + on d1.person_id = t0.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 515 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_515 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/600.sql b/inst/sql/sql_server/analyses/600.sql new file mode 100755 index 00000000..861bb9ab --- /dev/null +++ b/inst/sql/sql_server/analyses/600.sql @@ -0,0 +1,12 @@ +-- 600 Number of persons with at least one procedure occurrence, by procedure_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 600 as analysis_id, + CAST(po1.procedure_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_600 +from + @cdmDatabaseSchema.procedure_occurrence po1 +group by po1.procedure_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/601.sql b/inst/sql/sql_server/analyses/601.sql new file mode 100755 index 00000000..5ab193a9 --- /dev/null +++ b/inst/sql/sql_server/analyses/601.sql @@ -0,0 +1,12 @@ +-- 601 Number of procedure occurrence records, by procedure_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 601 as analysis_id, + CAST(po1.procedure_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_601 +from + @cdmDatabaseSchema.procedure_occurrence po1 +group by po1.procedure_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/602.sql b/inst/sql/sql_server/analyses/602.sql new file mode 100755 index 00000000..f95a1084 --- /dev/null +++ b/inst/sql/sql_server/analyses/602.sql @@ -0,0 +1,14 @@ +-- 602 Number of persons by procedure occurrence start month, by procedure_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 602 as analysis_id, + CAST(po1.procedure_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(procedure_date)*100 + month(procedure_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_602 +from +@cdmDatabaseSchema.procedure_occurrence po1 +group by po1.procedure_concept_id, + YEAR(procedure_date)*100 + month(procedure_date) +; diff --git a/inst/sql/sql_server/analyses/603.sql b/inst/sql/sql_server/analyses/603.sql new file mode 100755 index 00000000..1d4fa162 --- /dev/null +++ b/inst/sql/sql_server/analyses/603.sql @@ -0,0 +1,60 @@ +-- 603 Number of distinct procedure occurrence concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select COUNT_BIG(distinct po.procedure_concept_id) as num_procedures + from @cdmDatabaseSchema.procedure_occurrence po + group by po.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 603 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_603 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/604.sql b/inst/sql/sql_server/analyses/604.sql new file mode 100755 index 00000000..9ba00225 --- /dev/null +++ b/inst/sql/sql_server/analyses/604.sql @@ -0,0 +1,20 @@ +-- 604 Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 604 as analysis_id, + CAST(po1.procedure_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(procedure_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(procedure_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_604 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.procedure_occurrence po1 +on p1.person_id = po1.person_id +group by po1.procedure_concept_id, + YEAR(procedure_date), + p1.gender_concept_id, + floor((year(procedure_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/605.sql b/inst/sql/sql_server/analyses/605.sql new file mode 100755 index 00000000..855ae2b4 --- /dev/null +++ b/inst/sql/sql_server/analyses/605.sql @@ -0,0 +1,14 @@ +-- 605 Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 605 as analysis_id, + CAST(po1.procedure_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(po1.procedure_type_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_605 +from + @cdmDatabaseSchema.procedure_occurrence po1 +group by po1.procedure_CONCEPT_ID, + po1.procedure_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/606.sql b/inst/sql/sql_server/analyses/606.sql new file mode 100755 index 00000000..a92f077e --- /dev/null +++ b/inst/sql/sql_server/analyses/606.sql @@ -0,0 +1,73 @@ +-- 606 Distribution of age by procedure_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select po1.procedure_concept_id as subject_id, + p1.gender_concept_id, + po1.procedure_start_year - p1.year_of_birth as count_value +INTO #rawData_606 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, procedure_concept_id, min(year(procedure_date)) as procedure_start_year + from @cdmDatabaseSchema.procedure_occurrence + group by person_id, procedure_concept_id +) po1 on p1.person_id = po1.person_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_606 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_606 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 606 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_606 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; +truncate table #rawData_606; +drop table #rawData_606; diff --git a/inst/sql/sql_server/analyses/609.sql b/inst/sql/sql_server/analyses/609.sql new file mode 100755 index 00000000..fa976d17 --- /dev/null +++ b/inst/sql/sql_server/analyses/609.sql @@ -0,0 +1,13 @@ +-- 609 Number of procedure occurrence records with invalid person_id + + +select 609 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_609 +from + @cdmDatabaseSchema.procedure_occurrence po1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = po1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/610.sql b/inst/sql/sql_server/analyses/610.sql new file mode 100755 index 00000000..98ff3ed9 --- /dev/null +++ b/inst/sql/sql_server/analyses/610.sql @@ -0,0 +1,15 @@ +-- 610 Number of procedure occurrence records outside valid observation period + + +select 610 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_610 +from + @cdmDatabaseSchema.procedure_occurrence po1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = po1.person_id + and po1.procedure_date >= op1.observation_period_start_date + and po1.procedure_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/612.sql b/inst/sql/sql_server/analyses/612.sql new file mode 100755 index 00000000..55a284c2 --- /dev/null +++ b/inst/sql/sql_server/analyses/612.sql @@ -0,0 +1,14 @@ +-- 612 Number of procedure occurrence records with invalid provider_id + + +select 612 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_612 +from + @cdmDatabaseSchema.procedure_occurrence po1 + left join @cdmDatabaseSchema.provider p1 + on p1.provider_id = po1.provider_id +where po1.provider_id is not null + and p1.provider_id is null +; diff --git a/inst/sql/sql_server/analyses/613.sql b/inst/sql/sql_server/analyses/613.sql new file mode 100755 index 00000000..5e0ce59d --- /dev/null +++ b/inst/sql/sql_server/analyses/613.sql @@ -0,0 +1,14 @@ +-- 613 Number of procedure occurrence records with invalid visit_id + + +select 613 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(po1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_613 +from + @cdmDatabaseSchema.procedure_occurrence po1 + left join @cdmDatabaseSchema.visit_occurrence vo1 + on po1.visit_occurrence_id = vo1.visit_occurrence_id +where po1.visit_occurrence_id is not null + and vo1.visit_occurrence_id is null +; diff --git a/inst/sql/sql_server/analyses/620.sql b/inst/sql/sql_server/analyses/620.sql new file mode 100755 index 00000000..350d218f --- /dev/null +++ b/inst/sql/sql_server/analyses/620.sql @@ -0,0 +1,12 @@ +-- 620 Number of procedure occurrence records by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 620 as analysis_id, + CAST(YEAR(procedure_date)*100 + month(procedure_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_620 +from +@cdmDatabaseSchema.procedure_occurrence po1 +group by YEAR(procedure_date)*100 + month(procedure_date) +; diff --git a/inst/sql/sql_server/analyses/691.sql b/inst/sql/sql_server/analyses/691.sql new file mode 100755 index 00000000..8b045818 --- /dev/null +++ b/inst/sql/sql_server/analyses/691.sql @@ -0,0 +1,22 @@ +-- 691 Number of persons that have at least x procedures + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 691 as analysis_id, + CAST(procedure_concept_id as varchar(255)) as stratum_1, + CAST(prc_cnt as varchar(255)) as stratum_2, + null as stratum_3, + null as stratum_4, + null as stratum_5, + sum(count(person_id)) over (partition by procedure_concept_id order by prc_cnt desc) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_691 +from +( + select + p.procedure_concept_id, + count(p.procedure_occurrence_id) as prc_cnt, + p.person_id + from @cdmDatabaseSchema.procedure_occurrence p + group by p.person_id, p.procedure_concept_id +) cnt_q +group by cnt_q.procedure_concept_id, cnt_q.prc_cnt +; diff --git a/inst/sql/sql_server/analyses/7.sql b/inst/sql/sql_server/analyses/7.sql new file mode 100755 index 00000000..4364985f --- /dev/null +++ b/inst/sql/sql_server/analyses/7.sql @@ -0,0 +1,12 @@ +-- 7 Number of persons with invalid provider_id + +select 7 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_7 +from @cdmDatabaseSchema.PERSON p1 + left join @cdmDatabaseSchema.provider pr1 + on p1.provider_id = pr1.provider_id +where p1.provider_id is not null + and pr1.provider_id is null +; diff --git a/inst/sql/sql_server/analyses/700.sql b/inst/sql/sql_server/analyses/700.sql new file mode 100755 index 00000000..2012508f --- /dev/null +++ b/inst/sql/sql_server/analyses/700.sql @@ -0,0 +1,12 @@ +-- 700 Number of persons with at least one drug occurrence, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 700 as analysis_id, + CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_700 +from + @cdmDatabaseSchema.drug_exposure de1 +group by de1.drug_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/701.sql b/inst/sql/sql_server/analyses/701.sql new file mode 100755 index 00000000..242b0f7a --- /dev/null +++ b/inst/sql/sql_server/analyses/701.sql @@ -0,0 +1,12 @@ +-- 701 Number of drug occurrence records, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 701 as analysis_id, + CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_701 +from + @cdmDatabaseSchema.drug_exposure de1 +group by de1.drug_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/702.sql b/inst/sql/sql_server/analyses/702.sql new file mode 100755 index 00000000..1ffd580a --- /dev/null +++ b/inst/sql/sql_server/analyses/702.sql @@ -0,0 +1,14 @@ +-- 702 Number of persons by drug occurrence start month, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 702 as analysis_id, + CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_702 +from +@cdmDatabaseSchema.drug_exposure de1 +group by de1.drug_concept_id, + YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) +; diff --git a/inst/sql/sql_server/analyses/703.sql b/inst/sql/sql_server/analyses/703.sql new file mode 100755 index 00000000..98e30e76 --- /dev/null +++ b/inst/sql/sql_server/analyses/703.sql @@ -0,0 +1,65 @@ +-- 703 Number of distinct drug exposure concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select num_drugs as count_value + from + ( + select de1.person_id, COUNT_BIG(distinct de1.drug_concept_id) as num_drugs + from + @cdmDatabaseSchema.drug_exposure de1 + group by de1.person_id + ) t0 +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 703 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_703 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/704.sql b/inst/sql/sql_server/analyses/704.sql new file mode 100755 index 00000000..fcbb1a1a --- /dev/null +++ b/inst/sql/sql_server/analyses/704.sql @@ -0,0 +1,20 @@ +-- 704 Number of persons with at least one drug occurrence, by drug_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 704 as analysis_id, + CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(drug_exposure_start_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(drug_exposure_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_704 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.drug_exposure de1 +on p1.person_id = de1.person_id +group by de1.drug_concept_id, + YEAR(drug_exposure_start_date), + p1.gender_concept_id, + floor((year(drug_exposure_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/705.sql b/inst/sql/sql_server/analyses/705.sql new file mode 100755 index 00000000..2e8e9cda --- /dev/null +++ b/inst/sql/sql_server/analyses/705.sql @@ -0,0 +1,14 @@ +-- 705 Number of drug occurrence records, by drug_concept_id by drug_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 705 as analysis_id, + CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(de1.drug_type_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_705 +from + @cdmDatabaseSchema.drug_exposure de1 +group by de1.drug_CONCEPT_ID, + de1.drug_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/706.sql b/inst/sql/sql_server/analyses/706.sql new file mode 100755 index 00000000..8d43d6d7 --- /dev/null +++ b/inst/sql/sql_server/analyses/706.sql @@ -0,0 +1,75 @@ +-- 706 Distribution of age by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select de1.drug_concept_id as subject_id, + p1.gender_concept_id, + de1.drug_start_year - p1.year_of_birth as count_value +INTO #rawData_706 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, drug_concept_id, min(year(drug_exposure_start_date)) as drug_start_year + from @cdmDatabaseSchema.drug_exposure + group by person_id, drug_concept_id +) de1 on p1.person_id = de1.person_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_706 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_706 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 706 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_706 +from #tempResults +; + + +truncate table #rawData_706; +drop table #rawData_706; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/709.sql b/inst/sql/sql_server/analyses/709.sql new file mode 100755 index 00000000..1e8a769b --- /dev/null +++ b/inst/sql/sql_server/analyses/709.sql @@ -0,0 +1,13 @@ +-- 709 Number of drug exposure records with invalid person_id + + +select 709 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_709 +from + @cdmDatabaseSchema.drug_exposure de1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = de1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/710.sql b/inst/sql/sql_server/analyses/710.sql new file mode 100755 index 00000000..ac0b36cb --- /dev/null +++ b/inst/sql/sql_server/analyses/710.sql @@ -0,0 +1,15 @@ +-- 710 Number of drug exposure records outside valid observation period + + +select 710 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_710 +from + @cdmDatabaseSchema.drug_exposure de1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = de1.person_id + and de1.drug_exposure_start_date >= op1.observation_period_start_date + and de1.drug_exposure_start_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/711.sql b/inst/sql/sql_server/analyses/711.sql new file mode 100755 index 00000000..46ca461c --- /dev/null +++ b/inst/sql/sql_server/analyses/711.sql @@ -0,0 +1,11 @@ +-- 711 Number of drug exposure records with end date < start date + + +select 711 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_711 +from + @cdmDatabaseSchema.drug_exposure de1 +where de1.drug_exposure_end_date < de1.drug_exposure_start_date +; diff --git a/inst/sql/sql_server/analyses/712.sql b/inst/sql/sql_server/analyses/712.sql new file mode 100755 index 00000000..8e510cba --- /dev/null +++ b/inst/sql/sql_server/analyses/712.sql @@ -0,0 +1,14 @@ +-- 712 Number of drug exposure records with invalid provider_id + + +select 712 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_712 +from + @cdmDatabaseSchema.drug_exposure de1 + left join @cdmDatabaseSchema.provider p1 + on p1.provider_id = de1.provider_id +where de1.provider_id is not null + and p1.provider_id is null +; diff --git a/inst/sql/sql_server/analyses/713.sql b/inst/sql/sql_server/analyses/713.sql new file mode 100755 index 00000000..63ff99e3 --- /dev/null +++ b/inst/sql/sql_server/analyses/713.sql @@ -0,0 +1,14 @@ +-- 713 Number of drug exposure records with invalid visit_id + + +select 713 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_713 +from + @cdmDatabaseSchema.drug_exposure de1 + left join @cdmDatabaseSchema.visit_occurrence vo1 + on de1.visit_occurrence_id = vo1.visit_occurrence_id +where de1.visit_occurrence_id is not null + and vo1.visit_occurrence_id is null +; diff --git a/inst/sql/sql_server/analyses/715.sql b/inst/sql/sql_server/analyses/715.sql new file mode 100755 index 00000000..8ca22848 --- /dev/null +++ b/inst/sql/sql_server/analyses/715.sql @@ -0,0 +1,62 @@ +-- 715 Distribution of days_supply by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_id) +with rawData(stratum_id, count_value) as +( + select drug_concept_id, + days_supply as count_value + from @cdmDatabaseSchema.drug_exposure + where days_supply is not null +), +overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum_id +), +statsView (stratum_id, count_value, total, rn) as +( + select stratum_id, count_value, count_big(*) as total, row_number() over (partition by stratum_id order by count_value) as rn + FROM rawData + group by stratum_id, count_value +), +priorStats (stratum_id, count_value, total, accumulated) as +( + select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn + group by s.stratum_id, s.count_value, s.total, s.rn +) +select 715 as analysis_id, + CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum_id = o.stratum_id +GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_715 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/716.sql b/inst/sql/sql_server/analyses/716.sql new file mode 100755 index 00000000..745770d2 --- /dev/null +++ b/inst/sql/sql_server/analyses/716.sql @@ -0,0 +1,62 @@ +-- 716 Distribution of refills by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_id) +with rawData(stratum_id, count_value) as +( + select drug_concept_id, + refills as count_value + from @cdmDatabaseSchema.drug_exposure + where refills is not null +), +overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum_id +), +statsView (stratum_id, count_value, total, rn) as +( + select stratum_id, count_value, count_big(*) as total, row_number() over (partition by stratum_id order by count_value) as rn + FROM rawData + group by stratum_id, count_value +), +priorStats (stratum_id, count_value, total, accumulated) as +( + select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn + group by s.stratum_id, s.count_value, s.total, s.rn +) +select 716 as analysis_id, + CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum_id = o.stratum_id +GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_716 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/717.sql b/inst/sql/sql_server/analyses/717.sql new file mode 100755 index 00000000..c52eb0c3 --- /dev/null +++ b/inst/sql/sql_server/analyses/717.sql @@ -0,0 +1,62 @@ +-- 717 Distribution of quantity by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_id) +with rawData(stratum_id, count_value) as +( + select drug_concept_id, + CAST(quantity AS FLOAT) as count_value + from @cdmDatabaseSchema.drug_exposure + where quantity is not null +), +overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM rawData + group by stratum_id +), +statsView (stratum_id, count_value, total, rn) as +( + select stratum_id, count_value, count_big(*) as total, row_number() over (order by count_value) as rn + FROM rawData + group by stratum_id, count_value +), +priorStats (stratum_id, count_value, total, accumulated) as +( + select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn + group by s.stratum_id, s.count_value, s.total, s.rn +) +select 717 as analysis_id, + CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum_id = o.stratum_id +GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_id as stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_717 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/720.sql b/inst/sql/sql_server/analyses/720.sql new file mode 100755 index 00000000..861e9db0 --- /dev/null +++ b/inst/sql/sql_server/analyses/720.sql @@ -0,0 +1,12 @@ +-- 720 Number of drug exposure records by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 720 as analysis_id, + CAST(YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_720 +from +@cdmDatabaseSchema.drug_exposure de1 +group by YEAR(drug_exposure_start_date)*100 + month(drug_exposure_start_date) +; diff --git a/inst/sql/sql_server/analyses/791.sql b/inst/sql/sql_server/analyses/791.sql new file mode 100755 index 00000000..315bc989 --- /dev/null +++ b/inst/sql/sql_server/analyses/791.sql @@ -0,0 +1,22 @@ +-- 791 Number of total persons that have at least x drug exposures + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 791 as analysis_id, + CAST(drug_concept_id as varchar(255)) as stratum_1, + CAST(drg_cnt as varchar(255)) as stratum_2, + null as stratum_3, + null as stratum_4, + null as stratum_5, + sum(count(person_id)) over (partition by drug_concept_id order by drg_cnt desc) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_791 +from +( + select + d.drug_concept_id, + count(d.drug_exposure_id) as drg_cnt, + d.person_id + from @cdmDatabaseSchema.drug_exposure d + group by d.person_id, d.drug_concept_id +) cnt_q +group by cnt_q.drug_concept_id, cnt_q.drg_cnt +; diff --git a/inst/sql/sql_server/analyses/8.sql b/inst/sql/sql_server/analyses/8.sql new file mode 100755 index 00000000..fdce7956 --- /dev/null +++ b/inst/sql/sql_server/analyses/8.sql @@ -0,0 +1,12 @@ +-- 8 Number of persons with invalid location_id + +select 8 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_8 +from @cdmDatabaseSchema.PERSON p1 + left join @cdmDatabaseSchema.location l1 + on p1.location_id = l1.location_id +where p1.location_id is not null + and l1.location_id is null +; diff --git a/inst/sql/sql_server/analyses/800.sql b/inst/sql/sql_server/analyses/800.sql new file mode 100755 index 00000000..9ba71be5 --- /dev/null +++ b/inst/sql/sql_server/analyses/800.sql @@ -0,0 +1,12 @@ +-- 800 Number of persons with at least one observation occurrence, by observation_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 800 as analysis_id, + CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_800 +from + @cdmDatabaseSchema.observation o1 +group by o1.observation_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/801.sql b/inst/sql/sql_server/analyses/801.sql new file mode 100755 index 00000000..912b3f59 --- /dev/null +++ b/inst/sql/sql_server/analyses/801.sql @@ -0,0 +1,12 @@ +-- 801 Number of observation occurrence records, by observation_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 801 as analysis_id, + CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_801 +from + @cdmDatabaseSchema.observation o1 +group by o1.observation_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/802.sql b/inst/sql/sql_server/analyses/802.sql new file mode 100755 index 00000000..8ca883dc --- /dev/null +++ b/inst/sql/sql_server/analyses/802.sql @@ -0,0 +1,14 @@ +-- 802 Number of persons by observation occurrence start month, by observation_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 802 as analysis_id, + CAST(o1.observation_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(observation_date)*100 + month(observation_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_802 +from +@cdmDatabaseSchema.observation o1 +group by o1.observation_concept_id, + YEAR(observation_date)*100 + month(observation_date) +; diff --git a/inst/sql/sql_server/analyses/803.sql b/inst/sql/sql_server/analyses/803.sql new file mode 100755 index 00000000..86f8e536 --- /dev/null +++ b/inst/sql/sql_server/analyses/803.sql @@ -0,0 +1,66 @@ +-- 803 Number of distinct observation occurrence concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select num_observations as count_value + from + ( + select o1.person_id, COUNT_BIG(distinct o1.observation_concept_id) as num_observations + from + @cdmDatabaseSchema.observation o1 + group by o1.person_id + ) t0 +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 803 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_803 +from #tempResults +; + +truncate table #tempResults; + +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/804.sql b/inst/sql/sql_server/analyses/804.sql new file mode 100755 index 00000000..5d05b30a --- /dev/null +++ b/inst/sql/sql_server/analyses/804.sql @@ -0,0 +1,20 @@ +-- 804 Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 804 as analysis_id, + CAST(o1.observation_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(observation_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(observation_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_804 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.observation o1 +on p1.person_id = o1.person_id +group by o1.observation_concept_id, + YEAR(observation_date), + p1.gender_concept_id, + floor((year(observation_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/805.sql b/inst/sql/sql_server/analyses/805.sql new file mode 100755 index 00000000..221354ca --- /dev/null +++ b/inst/sql/sql_server/analyses/805.sql @@ -0,0 +1,14 @@ +-- 805 Number of observation occurrence records, by observation_concept_id by observation_type_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 805 as analysis_id, + CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(o1.observation_type_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_805 +from + @cdmDatabaseSchema.observation o1 +group by o1.observation_CONCEPT_ID, + o1.observation_type_concept_id +; diff --git a/inst/sql/sql_server/analyses/806.sql b/inst/sql/sql_server/analyses/806.sql new file mode 100755 index 00000000..f978aef7 --- /dev/null +++ b/inst/sql/sql_server/analyses/806.sql @@ -0,0 +1,76 @@ +-- 806 Distribution of age by observation_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select o1.observation_concept_id as subject_id, + p1.gender_concept_id, + o1.observation_start_year - p1.year_of_birth as count_value +INTO #rawData_806 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, observation_concept_id, min(year(observation_date)) as observation_start_year + from @cdmDatabaseSchema.observation + group by person_id, observation_concept_id +) o1 +on p1.person_id = o1.person_id +; + + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_806 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_806 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 806 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_806 +from #tempResults +; + +truncate table #rawData_806; +drop table #rawData_806; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/807.sql b/inst/sql/sql_server/analyses/807.sql new file mode 100755 index 00000000..d94baa18 --- /dev/null +++ b/inst/sql/sql_server/analyses/807.sql @@ -0,0 +1,14 @@ +-- 807 Number of observation occurrence records, by observation_concept_id and unit_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 807 as analysis_id, + CAST(o1.observation_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + CAST(o1.unit_concept_id AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_807 +from + @cdmDatabaseSchema.observation o1 +group by o1.observation_CONCEPT_ID, + o1.unit_concept_id +; diff --git a/inst/sql/sql_server/analyses/809.sql b/inst/sql/sql_server/analyses/809.sql new file mode 100755 index 00000000..34d20609 --- /dev/null +++ b/inst/sql/sql_server/analyses/809.sql @@ -0,0 +1,13 @@ +-- 809 Number of observation records with invalid person_id + + +select 809 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_809 +from + @cdmDatabaseSchema.observation o1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = o1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/810.sql b/inst/sql/sql_server/analyses/810.sql new file mode 100755 index 00000000..1ec2749c --- /dev/null +++ b/inst/sql/sql_server/analyses/810.sql @@ -0,0 +1,15 @@ +-- 810 Number of observation records outside valid observation period + + +select 810 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_810 +from + @cdmDatabaseSchema.observation o1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = o1.person_id + and o1.observation_date >= op1.observation_period_start_date + and o1.observation_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/812.sql b/inst/sql/sql_server/analyses/812.sql new file mode 100755 index 00000000..8d85b06b --- /dev/null +++ b/inst/sql/sql_server/analyses/812.sql @@ -0,0 +1,14 @@ +-- 812 Number of observation records with invalid provider_id + + +select 812 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_812 +from + @cdmDatabaseSchema.observation o1 + left join @cdmDatabaseSchema.provider p1 + on p1.provider_id = o1.provider_id +where o1.provider_id is not null + and p1.provider_id is null +; diff --git a/inst/sql/sql_server/analyses/813.sql b/inst/sql/sql_server/analyses/813.sql new file mode 100755 index 00000000..9271b795 --- /dev/null +++ b/inst/sql/sql_server/analyses/813.sql @@ -0,0 +1,14 @@ +-- 813 Number of observation records with invalid visit_id + + +select 813 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_813 +from + @cdmDatabaseSchema.observation o1 + left join @cdmDatabaseSchema.visit_occurrence vo1 + on o1.visit_occurrence_id = vo1.visit_occurrence_id +where o1.visit_occurrence_id is not null + and vo1.visit_occurrence_id is null +; diff --git a/inst/sql/sql_server/analyses/814.sql b/inst/sql/sql_server/analyses/814.sql new file mode 100755 index 00000000..aced3192 --- /dev/null +++ b/inst/sql/sql_server/analyses/814.sql @@ -0,0 +1,13 @@ +-- 814 Number of observation records with no value (numeric, string, or concept) + + +select 814 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(o1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_814 +from + @cdmDatabaseSchema.observation o1 +where o1.value_as_number is null + and o1.value_as_string is null + and o1.value_as_concept_id is null +; diff --git a/inst/sql/sql_server/analyses/815.sql b/inst/sql/sql_server/analyses/815.sql new file mode 100755 index 00000000..5e367468 --- /dev/null +++ b/inst/sql/sql_server/analyses/815.sql @@ -0,0 +1,80 @@ + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select subject_id as stratum1_id, + unit_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + into #overallStats + FROM + ( + select observation_concept_id as subject_id, + unit_concept_id, + CAST(value_as_number AS FLOAT) as count_value + from @cdmDatabaseSchema.observation o1 + where o1.unit_concept_id is not null + and o1.value_as_number is not null + ) A + group by subject_id, unit_concept_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select subject_id as stratum1_id, unit_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, unit_concept_id order by count_value) as rn +into #statsView +FROM +( + select observation_concept_id as subject_id, + unit_concept_id, + CAST(value_as_number AS FLOAT) as count_value + from @cdmDatabaseSchema.observation o1 + where o1.unit_concept_id is not null + and o1.value_as_number is not null +) A +group by subject_id, unit_concept_id, count_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from #statsView s + join #statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 815 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join #overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_815 +from #tempResults +; + +truncate table #overallStats; +drop table #overallStats; + +truncate table #statsView; +drop table #statsView; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/820.sql b/inst/sql/sql_server/analyses/820.sql new file mode 100755 index 00000000..6a488492 --- /dev/null +++ b/inst/sql/sql_server/analyses/820.sql @@ -0,0 +1,12 @@ +-- 820 Number of observation records by condition occurrence start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 820 as analysis_id, + CAST(YEAR(observation_date)*100 + month(observation_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_820 +from +@cdmDatabaseSchema.observation o1 +group by YEAR(observation_date)*100 + month(observation_date) +; diff --git a/inst/sql/sql_server/analyses/891.sql b/inst/sql/sql_server/analyses/891.sql new file mode 100755 index 00000000..7271bfd6 --- /dev/null +++ b/inst/sql/sql_server/analyses/891.sql @@ -0,0 +1,22 @@ +-- 891 Number of total persons that have at least x observations + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 891 as analysis_id, + CAST(observation_concept_id as varchar(255)) as stratum_1, + CAST(obs_cnt as varchar(255)) as stratum_2, + null as stratum_3, + null as stratum_4, + null as stratum_5, + sum(count(person_id)) over (partition by observation_concept_id order by obs_cnt desc) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_891 +from +( + select + o.observation_concept_id, + count(o.observation_id) as obs_cnt, + o.person_id + from @cdmDatabaseSchema.observation o + group by o.person_id, o.observation_concept_id +) cnt_q +group by cnt_q.observation_concept_id, cnt_q.obs_cnt +; diff --git a/inst/sql/sql_server/analyses/9.sql b/inst/sql/sql_server/analyses/9.sql new file mode 100755 index 00000000..3770b9a5 --- /dev/null +++ b/inst/sql/sql_server/analyses/9.sql @@ -0,0 +1,12 @@ +-- 9 Number of persons with invalid care_site_id + +select 9 as analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +COUNT_BIG(p1.person_id) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_9 +from @cdmDatabaseSchema.PERSON p1 + left join @cdmDatabaseSchema.care_site cs1 + on p1.care_site_id = cs1.care_site_id +where p1.care_site_id is not null + and cs1.care_site_id is null +; diff --git a/inst/sql/sql_server/analyses/900.sql b/inst/sql/sql_server/analyses/900.sql new file mode 100755 index 00000000..00297211 --- /dev/null +++ b/inst/sql/sql_server/analyses/900.sql @@ -0,0 +1,12 @@ +-- 900 Number of persons with at least one drug occurrence, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 900 as analysis_id, + CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_900 +from + @cdmDatabaseSchema.drug_era de1 +group by de1.drug_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/901.sql b/inst/sql/sql_server/analyses/901.sql new file mode 100755 index 00000000..b342d7e7 --- /dev/null +++ b/inst/sql/sql_server/analyses/901.sql @@ -0,0 +1,12 @@ +-- 901 Number of drug occurrence records, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 901 as analysis_id, + CAST(de1.drug_CONCEPT_ID AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_901 +from + @cdmDatabaseSchema.drug_era de1 +group by de1.drug_CONCEPT_ID +; diff --git a/inst/sql/sql_server/analyses/902.sql b/inst/sql/sql_server/analyses/902.sql new file mode 100755 index 00000000..89017821 --- /dev/null +++ b/inst/sql/sql_server/analyses/902.sql @@ -0,0 +1,14 @@ +-- 902 Number of persons by drug occurrence start month, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 902 as analysis_id, + CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(drug_era_start_date)*100 + month(drug_era_start_date) AS VARCHAR(255)) as stratum_2, + null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(distinct PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_902 +from +@cdmDatabaseSchema.drug_era de1 +group by de1.drug_concept_id, + YEAR(drug_era_start_date)*100 + month(drug_era_start_date) +; diff --git a/inst/sql/sql_server/analyses/903.sql b/inst/sql/sql_server/analyses/903.sql new file mode 100755 index 00000000..b1e7888b --- /dev/null +++ b/inst/sql/sql_server/analyses/903.sql @@ -0,0 +1,60 @@ +-- 903 Number of distinct drug era concepts per person + +--HINT DISTRIBUTE_ON_KEY(count_value) +with rawData(count_value) as +( + select COUNT_BIG(distinct de1.drug_concept_id) as count_value + from @cdmDatabaseSchema.drug_era de1 + group by de1.person_id +), +overallStats (avg_value, stdev_value, min_value, max_value, total) as +( + select CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData +), +statsView (count_value, total, rn) as +( + select count_value, + count_big(*) as total, + row_number() over (order by count_value) as rn + FROM rawData + group by count_value +), +priorStats (count_value, total, accumulated) as +( + select s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on p.rn <= s.rn + group by s.count_value, s.total, s.rn +) +select 903 as analysis_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +CROSS JOIN overallStats o +GROUP BY o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(count_value) +select analysis_id, +null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_903 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/904.sql b/inst/sql/sql_server/analyses/904.sql new file mode 100755 index 00000000..d7685488 --- /dev/null +++ b/inst/sql/sql_server/analyses/904.sql @@ -0,0 +1,20 @@ +-- 904 Number of persons with at least one drug occurrence, by drug_concept_id by calendar year by gender by age decile + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 904 as analysis_id, + CAST(de1.drug_concept_id AS VARCHAR(255)) as stratum_1, + CAST(YEAR(drug_era_start_date) AS VARCHAR(255)) as stratum_2, + CAST(p1.gender_concept_id AS VARCHAR(255)) as stratum_3, + CAST(floor((year(drug_era_start_date) - p1.year_of_birth)/10) AS VARCHAR(255)) as stratum_4, + null as stratum_5, + COUNT_BIG(distinct p1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_904 +from @cdmDatabaseSchema.PERSON p1 +inner join +@cdmDatabaseSchema.drug_era de1 +on p1.person_id = de1.person_id +group by de1.drug_concept_id, + YEAR(drug_era_start_date), + p1.gender_concept_id, + floor((year(drug_era_start_date) - p1.year_of_birth)/10) +; diff --git a/inst/sql/sql_server/analyses/906.sql b/inst/sql/sql_server/analyses/906.sql new file mode 100755 index 00000000..fb602227 --- /dev/null +++ b/inst/sql/sql_server/analyses/906.sql @@ -0,0 +1,75 @@ +-- 906 Distribution of age by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select de.drug_concept_id as subject_id, + p1.gender_concept_id, + de.drug_start_year - p1.year_of_birth as count_value +INTO #rawData_906 +from @cdmDatabaseSchema.PERSON p1 +inner join +( + select person_id, drug_concept_id, min(year(drug_era_start_date)) as drug_start_year + from @cdmDatabaseSchema.drug_era + group by person_id, drug_concept_id +) de on p1.person_id =de.person_id +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +with overallStats (stratum1_id, stratum2_id, avg_value, stdev_value, min_value, max_value, total) as +( + select subject_id as stratum1_id, + gender_concept_id as stratum2_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + FROM #rawData_906 + group by subject_id, gender_concept_id +), +statsView (stratum1_id, stratum2_id, count_value, total, rn) as +( + select subject_id as stratum1_id, gender_concept_id as stratum2_id, count_value, count_big(*) as total, row_number() over (partition by subject_id, gender_concept_id order by count_value) as rn + FROM #rawData_906 + group by subject_id, gender_concept_id, count_value +), +priorStats (stratum1_id, stratum2_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.stratum2_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and s.stratum2_id = p.stratum2_id and p.rn <= s.rn + group by s.stratum1_id, s.stratum2_id, s.count_value, s.total, s.rn +) +select 906 as analysis_id, + CAST(o.stratum1_id AS VARCHAR(255)) AS stratum1_id, + CAST(o.stratum2_id AS VARCHAR(255)) AS stratum2_id, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id and p.stratum2_id = o.stratum2_id +GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum1_id as stratum_1, stratum2_id as stratum_2, +null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_906 +from #tempResults +; + + +truncate table #rawData_906; +drop table #rawData_906; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/907.sql b/inst/sql/sql_server/analyses/907.sql new file mode 100755 index 00000000..20d37f7a --- /dev/null +++ b/inst/sql/sql_server/analyses/907.sql @@ -0,0 +1,64 @@ +-- 907 Distribution of drug era length, by drug_concept_id + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +with rawData(stratum1_id, count_value) as +( + select drug_concept_id, + datediff(dd,drug_era_start_date, drug_era_end_date) as count_value + from @cdmDatabaseSchema.drug_era de1 +), +overallStats (stratum1_id, avg_value, stdev_value, min_value, max_value, total) as +( + select stratum1_id, + CAST(avg(1.0 * count_value) AS FLOAT) as avg_value, + CAST(stdev(count_value) AS FLOAT) as stdev_value, + min(count_value) as min_value, + max(count_value) as max_value, + count_big(*) as total + from rawData + group by stratum1_id +), +statsView (stratum1_id, count_value, total, rn) as +( + select stratum1_id, + count_value, + count_big(*) as total, + row_number() over (partition by stratum1_id order by count_value) as rn + FROM rawData + group by stratum1_id, count_value +), +priorStats (stratum1_id, count_value, total, accumulated) as +( + select s.stratum1_id, s.count_value, s.total, sum(p.total) as accumulated + from statsView s + join statsView p on s.stratum1_id = p.stratum1_id and p.rn <= s.rn + group by s.stratum1_id, s.count_value, s.total, s.rn +) +select 907 as analysis_id, + CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value +into #tempResults +from priorStats p +join overallStats o on p.stratum1_id = o.stratum1_id +GROUP BY p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select analysis_id, stratum_1, +null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, +count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_907 +from #tempResults +; + +truncate table #tempResults; +drop table #tempResults; diff --git a/inst/sql/sql_server/analyses/908.sql b/inst/sql/sql_server/analyses/908.sql new file mode 100755 index 00000000..1285c8eb --- /dev/null +++ b/inst/sql/sql_server/analyses/908.sql @@ -0,0 +1,13 @@ +-- 908 Number of drug eras with invalid person + + +select 908 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_908 +from + @cdmDatabaseSchema.drug_era de1 + left join @cdmDatabaseSchema.PERSON p1 + on p1.person_id = de1.person_id +where p1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/909.sql b/inst/sql/sql_server/analyses/909.sql new file mode 100755 index 00000000..09e06f0f --- /dev/null +++ b/inst/sql/sql_server/analyses/909.sql @@ -0,0 +1,15 @@ +-- 909 Number of drug eras outside valid observation period + + +select 909 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_909 +from + @cdmDatabaseSchema.drug_era de1 + left join @cdmDatabaseSchema.observation_period op1 + on op1.person_id = de1.person_id + and de1.drug_era_start_date >= op1.observation_period_start_date + and de1.drug_era_start_date <= op1.observation_period_end_date +where op1.person_id is null +; diff --git a/inst/sql/sql_server/analyses/910.sql b/inst/sql/sql_server/analyses/910.sql new file mode 100755 index 00000000..71b9a52d --- /dev/null +++ b/inst/sql/sql_server/analyses/910.sql @@ -0,0 +1,11 @@ +-- 910 Number of drug eras with end date < start date + + +select 910 as analysis_id, + null as stratum_1, null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(de1.PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_910 +from + @cdmDatabaseSchema.drug_era de1 +where de1.drug_era_end_date < de1.drug_era_start_date +; diff --git a/inst/sql/sql_server/analyses/920.sql b/inst/sql/sql_server/analyses/920.sql new file mode 100755 index 00000000..0a2a1b38 --- /dev/null +++ b/inst/sql/sql_server/analyses/920.sql @@ -0,0 +1,12 @@ +-- 920 Number of drug era records by drug era start month + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select 920 as analysis_id, + CAST(YEAR(drug_era_start_date)*100 + month(drug_era_start_date) AS VARCHAR(255)) as stratum_1, + null as stratum_2, null as stratum_3, null as stratum_4, null as stratum_5, + COUNT_BIG(PERSON_ID) as count_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_920 +from +@cdmDatabaseSchema.drug_era de1 +group by YEAR(drug_era_start_date)*100 + month(drug_era_start_date) +; diff --git a/inst/sql/sql_server/analyses/cost_distribution_template.sql b/inst/sql/sql_server/analyses/cost_distribution_template.sql new file mode 100755 index 00000000..cc56b4b1 --- /dev/null +++ b/inst/sql/sql_server/analyses/cost_distribution_template.sql @@ -0,0 +1,59 @@ + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select + subject_id as stratum1_id, + CAST(avg(1.0 * @costColumn) AS FLOAT) as avg_value, + CAST(stdev(@costColumn) AS FLOAT) as stdev_value, + min(@costColumn) as min_value, + max(@costColumn) as max_value, + @costColumn as count_value, + count_big(*) as total, + row_number() over (partition by subject_id order by @costColumn) as rn +into #overallStats +from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_@domainId_cost_raw +where @costColumn is not null +group by subject_id, @costColumn +; + +--HINT DISTRIBUTE_ON_KEY(stratum1_id) +select + s.stratum1_id, + s.count_value, + s.total, + sum(p.total) as accumulated +into #priorStats +from #overallStats s +join #overallStats p + on s.stratum1_id = p.stratum1_id and p.rn <= s.rn +group by s.stratum1_id, s.count_value, s.total, s.rn +; + +--HINT DISTRIBUTE_ON_KEY(stratum_1) +select + @analysisId as analysis_id, + CAST(p.stratum1_id AS VARCHAR(255)) as stratum_1, + null as stratum_2, + null as stratum_3, + null as stratum_4, + null as stratum_5, + o.total as count_value, + o.min_value, + o.max_value, + o.avg_value, + o.stdev_value, + MIN(case when p.accumulated >= .50 * o.total then o.total else o.max_value end) as median_value, + MIN(case when p.accumulated >= .10 * o.total then o.total else o.max_value end) as p10_value, + MIN(case when p.accumulated >= .25 * o.total then o.total else o.max_value end) as p25_value, + MIN(case when p.accumulated >= .75 * o.total then o.total else o.max_value end) as p75_value, + MIN(case when p.accumulated >= .90 * o.total then o.total else o.max_value end) as p90_value +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_dist_@analysisId +from #priorStats p +join #overallStats o on p.stratum1_id = o.stratum1_id +group by p.stratum1_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value +; + +truncate table #overallStats; +drop table #overallStats; + +truncate table #priorStats; +drop table #priorStats; diff --git a/inst/sql/sql_server/analyses/create_analysis_table.sql b/inst/sql/sql_server/analyses/create_analysis_table.sql new file mode 100755 index 00000000..96416f6a --- /dev/null +++ b/inst/sql/sql_server/analyses/create_analysis_table.sql @@ -0,0 +1,18 @@ +IF OBJECT_ID('@resultsDatabaseSchema.achilles_analysis', 'U') IS NOT NULL + drop table @resultsDatabaseSchema.achilles_analysis; + +with cte_analyses +as +( + @analysesSqls +) +select + analysis_id, + analysis_name, + stratum_1_name, + stratum_2_name, + stratum_3_name, + stratum_4_name, + stratum_5_name +into @resultsDatabaseSchema.ACHILLES_analysis +from cte_analyses; \ No newline at end of file diff --git a/inst/sql/sql_server/analyses/merge_achilles_tables.sql b/inst/sql/sql_server/analyses/merge_achilles_tables.sql new file mode 100755 index 00000000..e7e020fe --- /dev/null +++ b/inst/sql/sql_server/analyses/merge_achilles_tables.sql @@ -0,0 +1,22 @@ +{@createTable}?{ + IF OBJECT_ID('@resultsDatabaseSchema.achilles_@detailType', 'U') IS NOT NULL + drop table @resultsDatabaseSchema.achilles_@detailType; +} +--HINT DISTRIBUTE_ON_KEY(analysis_id) +with cte_merged +as +( + @detailSqls +) +{!@createTable}?{ + insert into @resultsDatabaseSchema.achilles_@detailType +} +select @fieldNames +{@createTable}?{ + into @resultsDatabaseSchema.achilles_@detailType +} +from cte_merged +{@smallCellCount != ''}?{ + where count_value > @smallCellCount +} +; \ No newline at end of file diff --git a/inst/sql/sql_server/analyses/raw_cost_template.sql b/inst/sql/sql_server/analyses/raw_cost_template.sql new file mode 100755 index 00000000..c285a6e6 --- /dev/null +++ b/inst/sql/sql_server/analyses/raw_cost_template.sql @@ -0,0 +1,34 @@ +IF OBJECT_ID('@scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_@domainId_cost_raw', 'U') IS NOT NULL + DROP TABLE @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_@domainId_cost_raw; + +{@cdmVersion == '5'}?{ + --HINT DISTRIBUTE_ON_KEY(cost_event_id) + select + @domainId_id as cost_event_id, + @costColumns + into #rawCost + from @cdmDatabaseSchema.@domainId_cost + ; +}:{ + --HINT DISTRIBUTE_ON_KEY(cost_event_id) + select + cost_event_id, + @costColumns + into #rawCost + from @cdmDatabaseSchema.cost + where cost_domain_id = '@domainId' + ; +} + +--HINT DISTRIBUTE_ON_KEY(subject_id) +select + B.@domainId_concept_id as subject_id, + @costColumns +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_@domainId_cost_raw +from #rawCost A +join @cdmDatabaseSchema.@domainTable B + on A.cost_event_id = B.@domainTable_id and B.@domainId_concept_id <> 0 +; + +truncate table #rawCost; +drop table #rawCost; \ No newline at end of file diff --git a/inst/sql/sql_server/export_v5/achillesheel/sqlAchillesHeel.sql b/inst/sql/sql_server/export/achillesheel/sqlAchillesHeel.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/achillesheel/sqlAchillesHeel.sql rename to inst/sql/sql_server/export/achillesheel/sqlAchillesHeel.sql diff --git a/inst/sql/sql_server/export_v5/condition/sqlAgeAtFirstDiagnosis.sql b/inst/sql/sql_server/export/condition/sqlAgeAtFirstDiagnosis.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/condition/sqlAgeAtFirstDiagnosis.sql rename to inst/sql/sql_server/export/condition/sqlAgeAtFirstDiagnosis.sql diff --git a/inst/sql/sql_server/export_v5/condition/sqlConditionTreemap.sql b/inst/sql/sql_server/export/condition/sqlConditionTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/condition/sqlConditionTreemap.sql rename to inst/sql/sql_server/export/condition/sqlConditionTreemap.sql diff --git a/inst/sql/sql_server/export_v5/condition/sqlConditionsByType.sql b/inst/sql/sql_server/export/condition/sqlConditionsByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/condition/sqlConditionsByType.sql rename to inst/sql/sql_server/export/condition/sqlConditionsByType.sql diff --git a/inst/sql/sql_server/export_v5/condition/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/condition/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/condition/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/condition/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/condition/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/condition/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/condition/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/condition/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/conditionera/sqlAgeAtFirstDiagnosis.sql b/inst/sql/sql_server/export/conditionera/sqlAgeAtFirstDiagnosis.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/conditionera/sqlAgeAtFirstDiagnosis.sql rename to inst/sql/sql_server/export/conditionera/sqlAgeAtFirstDiagnosis.sql diff --git a/inst/sql/sql_server/export_v5/conditionera/sqlConditionEraTreemap.sql b/inst/sql/sql_server/export/conditionera/sqlConditionEraTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/conditionera/sqlConditionEraTreemap.sql rename to inst/sql/sql_server/export/conditionera/sqlConditionEraTreemap.sql diff --git a/inst/sql/sql_server/export_v5/conditionera/sqlLengthOfEra.sql b/inst/sql/sql_server/export/conditionera/sqlLengthOfEra.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/conditionera/sqlLengthOfEra.sql rename to inst/sql/sql_server/export/conditionera/sqlLengthOfEra.sql diff --git a/inst/sql/sql_server/export_v5/conditionera/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/conditionera/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/conditionera/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/conditionera/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/conditionera/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/conditionera/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/conditionera/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/conditionera/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v4/datadensity/conceptsperperson.sql b/inst/sql/sql_server/export/datadensity/conceptsperperson.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v4/datadensity/conceptsperperson.sql rename to inst/sql/sql_server/export/datadensity/conceptsperperson.sql diff --git a/inst/sql/sql_server/export_v5/datadensity/recordsperperson.sql b/inst/sql/sql_server/export/datadensity/recordsperperson.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/datadensity/recordsperperson.sql rename to inst/sql/sql_server/export/datadensity/recordsperperson.sql diff --git a/inst/sql/sql_server/export_v5/datadensity/totalrecords.sql b/inst/sql/sql_server/export/datadensity/totalrecords.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/datadensity/totalrecords.sql rename to inst/sql/sql_server/export/datadensity/totalrecords.sql diff --git a/inst/sql/sql_server/export_v5/death/sqlAgeAtDeath.sql b/inst/sql/sql_server/export/death/sqlAgeAtDeath.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/death/sqlAgeAtDeath.sql rename to inst/sql/sql_server/export/death/sqlAgeAtDeath.sql diff --git a/inst/sql/sql_server/export_v5/death/sqlDeathByType.sql b/inst/sql/sql_server/export/death/sqlDeathByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/death/sqlDeathByType.sql rename to inst/sql/sql_server/export/death/sqlDeathByType.sql diff --git a/inst/sql/sql_server/export_v5/death/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/death/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/death/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/death/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/death/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/death/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/death/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/death/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/domainmeta/sqlDomainMeta.sql b/inst/sql/sql_server/export/domainmeta/sqlDomainMeta.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/domainmeta/sqlDomainMeta.sql rename to inst/sql/sql_server/export/domainmeta/sqlDomainMeta.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlAgeAtFirstExposure.sql b/inst/sql/sql_server/export/drug/sqlAgeAtFirstExposure.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlAgeAtFirstExposure.sql rename to inst/sql/sql_server/export/drug/sqlAgeAtFirstExposure.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlDaysSupplyDistribution.sql b/inst/sql/sql_server/export/drug/sqlDaysSupplyDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlDaysSupplyDistribution.sql rename to inst/sql/sql_server/export/drug/sqlDaysSupplyDistribution.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlDrugTreemap.sql b/inst/sql/sql_server/export/drug/sqlDrugTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlDrugTreemap.sql rename to inst/sql/sql_server/export/drug/sqlDrugTreemap.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlDrugsByType.sql b/inst/sql/sql_server/export/drug/sqlDrugsByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlDrugsByType.sql rename to inst/sql/sql_server/export/drug/sqlDrugsByType.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlFrequencyDistribution.sql b/inst/sql/sql_server/export/drug/sqlFrequencyDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlFrequencyDistribution.sql rename to inst/sql/sql_server/export/drug/sqlFrequencyDistribution.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/drug/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/drug/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/drug/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/drug/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlQuantityDistribution.sql b/inst/sql/sql_server/export/drug/sqlQuantityDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlQuantityDistribution.sql rename to inst/sql/sql_server/export/drug/sqlQuantityDistribution.sql diff --git a/inst/sql/sql_server/export_v5/drug/sqlRefillsDistribution.sql b/inst/sql/sql_server/export/drug/sqlRefillsDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drug/sqlRefillsDistribution.sql rename to inst/sql/sql_server/export/drug/sqlRefillsDistribution.sql diff --git a/inst/sql/sql_server/export_v5/drugera/sqlAgeAtFirstExposure.sql b/inst/sql/sql_server/export/drugera/sqlAgeAtFirstExposure.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drugera/sqlAgeAtFirstExposure.sql rename to inst/sql/sql_server/export/drugera/sqlAgeAtFirstExposure.sql diff --git a/inst/sql/sql_server/export_v5/drugera/sqlDrugEraTreemap.sql b/inst/sql/sql_server/export/drugera/sqlDrugEraTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drugera/sqlDrugEraTreemap.sql rename to inst/sql/sql_server/export/drugera/sqlDrugEraTreemap.sql diff --git a/inst/sql/sql_server/export_v5/drugera/sqlLengthOfEra.sql b/inst/sql/sql_server/export/drugera/sqlLengthOfEra.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drugera/sqlLengthOfEra.sql rename to inst/sql/sql_server/export/drugera/sqlLengthOfEra.sql diff --git a/inst/sql/sql_server/export_v5/drugera/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/drugera/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drugera/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/drugera/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/drugera/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/drugera/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/drugera/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/drugera/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export/measurement/sqlAgeAtFirstOccurrence.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlAgeAtFirstOccurrence.sql rename to inst/sql/sql_server/export/measurement/sqlAgeAtFirstOccurrence.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlFrequencyDistribution.sql b/inst/sql/sql_server/export/measurement/sqlFrequencyDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlFrequencyDistribution.sql rename to inst/sql/sql_server/export/measurement/sqlFrequencyDistribution.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlLowerLimitDistribution.sql b/inst/sql/sql_server/export/measurement/sqlLowerLimitDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlLowerLimitDistribution.sql rename to inst/sql/sql_server/export/measurement/sqlLowerLimitDistribution.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlMeasurementTreemap.sql b/inst/sql/sql_server/export/measurement/sqlMeasurementTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlMeasurementTreemap.sql rename to inst/sql/sql_server/export/measurement/sqlMeasurementTreemap.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlMeasurementValueDistribution.sql b/inst/sql/sql_server/export/measurement/sqlMeasurementValueDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlMeasurementValueDistribution.sql rename to inst/sql/sql_server/export/measurement/sqlMeasurementValueDistribution.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlMeasurementsByType.sql b/inst/sql/sql_server/export/measurement/sqlMeasurementsByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlMeasurementsByType.sql rename to inst/sql/sql_server/export/measurement/sqlMeasurementsByType.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/measurement/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/measurement/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/measurement/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/measurement/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlRecordsByUnit.sql b/inst/sql/sql_server/export/measurement/sqlRecordsByUnit.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlRecordsByUnit.sql rename to inst/sql/sql_server/export/measurement/sqlRecordsByUnit.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlUpperLimitDistribution.sql b/inst/sql/sql_server/export/measurement/sqlUpperLimitDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlUpperLimitDistribution.sql rename to inst/sql/sql_server/export/measurement/sqlUpperLimitDistribution.sql diff --git a/inst/sql/sql_server/export_v5/measurement/sqlValuesRelativeToNorm.sql b/inst/sql/sql_server/export/measurement/sqlValuesRelativeToNorm.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/measurement/sqlValuesRelativeToNorm.sql rename to inst/sql/sql_server/export/measurement/sqlValuesRelativeToNorm.sql diff --git a/inst/sql/sql_server/export_v5/observation/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export/observation/sqlAgeAtFirstOccurrence.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observation/sqlAgeAtFirstOccurrence.sql rename to inst/sql/sql_server/export/observation/sqlAgeAtFirstOccurrence.sql diff --git a/inst/sql/sql_server/export_v5/observation/sqlFrequencyDistribution.sql b/inst/sql/sql_server/export/observation/sqlFrequencyDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observation/sqlFrequencyDistribution.sql rename to inst/sql/sql_server/export/observation/sqlFrequencyDistribution.sql diff --git a/inst/sql/sql_server/export_v5/observation/sqlObservationTreemap.sql b/inst/sql/sql_server/export/observation/sqlObservationTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observation/sqlObservationTreemap.sql rename to inst/sql/sql_server/export/observation/sqlObservationTreemap.sql diff --git a/inst/sql/sql_server/export_v5/observation/sqlObservationsByType.sql b/inst/sql/sql_server/export/observation/sqlObservationsByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observation/sqlObservationsByType.sql rename to inst/sql/sql_server/export/observation/sqlObservationsByType.sql diff --git a/inst/sql/sql_server/export_v5/observation/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/observation/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observation/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/observation/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/observation/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/observation/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observation/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/observation/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/ageatfirst.sql b/inst/sql/sql_server/export/observationperiod/ageatfirst.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/ageatfirst.sql rename to inst/sql/sql_server/export/observationperiod/ageatfirst.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/agebygender.sql b/inst/sql/sql_server/export/observationperiod/agebygender.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/agebygender.sql rename to inst/sql/sql_server/export/observationperiod/agebygender.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/cumulativeduration.sql b/inst/sql/sql_server/export/observationperiod/cumulativeduration.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/cumulativeduration.sql rename to inst/sql/sql_server/export/observationperiod/cumulativeduration.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/observationlength_data.sql b/inst/sql/sql_server/export/observationperiod/observationlength_data.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/observationlength_data.sql rename to inst/sql/sql_server/export/observationperiod/observationlength_data.sql diff --git a/inst/sql/sql_server/export_v4/observationperiod/observationlength_stats.sql b/inst/sql/sql_server/export/observationperiod/observationlength_stats.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v4/observationperiod/observationlength_stats.sql rename to inst/sql/sql_server/export/observationperiod/observationlength_stats.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/observationlengthbyage.sql b/inst/sql/sql_server/export/observationperiod/observationlengthbyage.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/observationlengthbyage.sql rename to inst/sql/sql_server/export/observationperiod/observationlengthbyage.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/observationlengthbygender.sql b/inst/sql/sql_server/export/observationperiod/observationlengthbygender.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/observationlengthbygender.sql rename to inst/sql/sql_server/export/observationperiod/observationlengthbygender.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/observedbymonth.sql b/inst/sql/sql_server/export/observationperiod/observedbymonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/observedbymonth.sql rename to inst/sql/sql_server/export/observationperiod/observedbymonth.sql diff --git a/inst/sql/sql_server/export_v5/observationperiod/observedbyyear_data.sql b/inst/sql/sql_server/export/observationperiod/observedbyyear_data.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/observationperiod/observedbyyear_data.sql rename to inst/sql/sql_server/export/observationperiod/observedbyyear_data.sql diff --git a/inst/sql/sql_server/export_v4/observationperiod/observedbyyear_stats.sql b/inst/sql/sql_server/export/observationperiod/observedbyyear_stats.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v4/observationperiod/observedbyyear_stats.sql rename to inst/sql/sql_server/export/observationperiod/observedbyyear_stats.sql diff --git a/inst/sql/sql_server/export_v4/observationperiod/periodsperperson.sql b/inst/sql/sql_server/export/observationperiod/periodsperperson.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v4/observationperiod/periodsperperson.sql rename to inst/sql/sql_server/export/observationperiod/periodsperperson.sql diff --git a/inst/sql/sql_server/export_v5/person/ethnicity.sql b/inst/sql/sql_server/export/person/ethnicity.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/person/ethnicity.sql rename to inst/sql/sql_server/export/person/ethnicity.sql diff --git a/inst/sql/sql_server/export_v5/person/gender.sql b/inst/sql/sql_server/export/person/gender.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/person/gender.sql rename to inst/sql/sql_server/export/person/gender.sql diff --git a/inst/sql/sql_server/export_v4/person/population.sql b/inst/sql/sql_server/export/person/population.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v4/person/population.sql rename to inst/sql/sql_server/export/person/population.sql diff --git a/inst/sql/sql_server/export_v5/person/race.sql b/inst/sql/sql_server/export/person/race.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/person/race.sql rename to inst/sql/sql_server/export/person/race.sql diff --git a/inst/sql/sql_server/export_v5/person/yearofbirth_data.sql b/inst/sql/sql_server/export/person/yearofbirth_data.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/person/yearofbirth_data.sql rename to inst/sql/sql_server/export/person/yearofbirth_data.sql diff --git a/inst/sql/sql_server/export_v4/person/yearofbirth_stats.sql b/inst/sql/sql_server/export/person/yearofbirth_stats.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v4/person/yearofbirth_stats.sql rename to inst/sql/sql_server/export/person/yearofbirth_stats.sql diff --git a/inst/sql/sql_server/export_v5/procedure/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export/procedure/sqlAgeAtFirstOccurrence.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/procedure/sqlAgeAtFirstOccurrence.sql rename to inst/sql/sql_server/export/procedure/sqlAgeAtFirstOccurrence.sql diff --git a/inst/sql/sql_server/export_v5/procedure/sqlFrequencyDistribution.sql b/inst/sql/sql_server/export/procedure/sqlFrequencyDistribution.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/procedure/sqlFrequencyDistribution.sql rename to inst/sql/sql_server/export/procedure/sqlFrequencyDistribution.sql diff --git a/inst/sql/sql_server/export_v5/procedure/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/procedure/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/procedure/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/procedure/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/procedure/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/procedure/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/procedure/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/procedure/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/procedure/sqlProcedureTreemap.sql b/inst/sql/sql_server/export/procedure/sqlProcedureTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/procedure/sqlProcedureTreemap.sql rename to inst/sql/sql_server/export/procedure/sqlProcedureTreemap.sql diff --git a/inst/sql/sql_server/export_v5/procedure/sqlProceduresByType.sql b/inst/sql/sql_server/export/procedure/sqlProceduresByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/procedure/sqlProceduresByType.sql rename to inst/sql/sql_server/export/procedure/sqlProceduresByType.sql diff --git a/inst/sql/sql_server/export_v5/visit/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export/visit/sqlAgeAtFirstOccurrence.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/visit/sqlAgeAtFirstOccurrence.sql rename to inst/sql/sql_server/export/visit/sqlAgeAtFirstOccurrence.sql diff --git a/inst/sql/sql_server/export_v5/visit/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export/visit/sqlPrevalenceByGenderAgeYear.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/visit/sqlPrevalenceByGenderAgeYear.sql rename to inst/sql/sql_server/export/visit/sqlPrevalenceByGenderAgeYear.sql diff --git a/inst/sql/sql_server/export_v5/visit/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export/visit/sqlPrevalenceByMonth.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/visit/sqlPrevalenceByMonth.sql rename to inst/sql/sql_server/export/visit/sqlPrevalenceByMonth.sql diff --git a/inst/sql/sql_server/export_v5/visit/sqlVisitDurationByType.sql b/inst/sql/sql_server/export/visit/sqlVisitDurationByType.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/visit/sqlVisitDurationByType.sql rename to inst/sql/sql_server/export/visit/sqlVisitDurationByType.sql diff --git a/inst/sql/sql_server/export_v5/visit/sqlVisitTreemap.sql b/inst/sql/sql_server/export/visit/sqlVisitTreemap.sql old mode 100644 new mode 100755 similarity index 100% rename from inst/sql/sql_server/export_v5/visit/sqlVisitTreemap.sql rename to inst/sql/sql_server/export/visit/sqlVisitTreemap.sql diff --git a/inst/sql/sql_server/export_v4/achillesheel/sqlAchillesHeel.sql b/inst/sql/sql_server/export_v4/achillesheel/sqlAchillesHeel.sql deleted file mode 100644 index 6b2b8152..00000000 --- a/inst/sql/sql_server/export_v4/achillesheel/sqlAchillesHeel.sql +++ /dev/null @@ -1,3 +0,0 @@ -select analysis_id as AttributeName, ACHILLES_HEEL_warning as AttributeValue -from @results_database_schema.ACHILLES_HEEL_results -order by case when left(ACHILLES_HEEL_warning,5) = 'Error' then 1 else 2 end, analysis_id diff --git a/inst/sql/sql_server/export_v4/condition/sqlAgeAtFirstDiagnosis.sql b/inst/sql/sql_server/export_v4/condition/sqlAgeAtFirstDiagnosis.sql deleted file mode 100644 index 63d1186f..00000000 --- a/inst/sql/sql_server/export_v4/condition/sqlAgeAtFirstDiagnosis.sql +++ /dev/null @@ -1,17 +0,0 @@ - select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value - from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on CAST(ard1.stratum_1 AS INT) = c1.concept_id - inner join - @vocab_database_schema.concept c2 - on CAST(ard1.stratum_2 AS INT) = c2.concept_id - where ard1.analysis_id = 406 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/condition/sqlConditionTreemap.sql b/inst/sql/sql_server/export_v4/condition/sqlConditionTreemap.sql deleted file mode 100644 index 7d882f8f..00000000 --- a/inst/sql/sql_server/export_v4/condition/sqlConditionTreemap.sql +++ /dev/null @@ -1,105 +0,0 @@ -select concept_hierarchy.concept_id, - isNull(concept_hierarchy.soc_concept_name,'NA') + '||' + isNull(concept_hierarchy.hlgt_concept_name,'NA') + '||' + isNull(concept_hierarchy.hlt_concept_name,'NA') + '||' + isNull(concept_hierarchy.pt_concept_name,'NA') + '||' + isNull(concept_hierarchy.snomed_concept_name,'NA') concept_path, - ar1.count_value as num_persons, - round(1.0*ar1.count_value / denom.count_value,5) as percent_persons, - round(1.0*ar2.count_value / ar1.count_value,5) as records_per_person -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 400) ar1 - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 401) ar2 - on ar1.stratum_1 = ar2.stratum_1 - inner join - ( - select snomed.concept_id, - snomed.concept_name as snomed_concept_name, - pt_to_hlt.pt_concept_name, - hlt_to_hlgt.hlt_concept_name, - hlgt_to_soc.hlgt_concept_name, - soc.concept_name as soc_concept_name - from - ( - select concept_id, concept_name - from @vocab_database_schema.concept - where vocabulary_id = 1 - ) snomed - left join - (select c1.concept_id as snomed_concept_id, max(c2.concept_id) as pt_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 1 - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'Preferred Term' - group by c1.concept_id - ) snomed_to_pt - on snomed.concept_id = snomed_to_pt.snomed_concept_id - - left join - (select c1.concept_id as pt_concept_id, c1.concept_name as pt_concept_name, max(c2.concept_id) as hlt_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 15 - and c1.concept_class = 'Preferred Term' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'High Level Term' - group by c1.concept_id, c1.concept_name - ) pt_to_hlt - on snomed_to_pt.pt_concept_id = pt_to_hlt.pt_concept_id - - left join - (select c1.concept_id as hlt_concept_id, c1.concept_name as hlt_concept_name, max(c2.concept_id) as hlgt_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 15 - and c1.concept_class = 'High Level Term' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'High Level Group Term' - group by c1.concept_id, c1.concept_name - ) hlt_to_hlgt - on pt_to_hlt.hlt_concept_id = hlt_to_hlgt.hlt_concept_id - - left join - (select c1.concept_id as hlgt_concept_id, c1.concept_name as hlgt_concept_name, max(c2.concept_id) as soc_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 15 - and c1.concept_class = 'High Level Group Term' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'System Organ Class' - group by c1.concept_id, c1.concept_name - ) hlgt_to_soc - on hlt_to_hlgt.hlgt_concept_id = hlgt_to_soc.hlgt_concept_id - - left join @vocab_database_schema.concept soc - on hlgt_to_soc.soc_concept_id = soc.concept_id - - - - ) concept_hierarchy - on CAST(ar1.stratum_1 AS INT) = concept_hierarchy.concept_id - , - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom - -order by ar1.count_value desc diff --git a/inst/sql/sql_server/export_v4/condition/sqlConditionsByType.sql b/inst/sql/sql_server/export_v4/condition/sqlConditionsByType.sql deleted file mode 100644 index a1d60288..00000000 --- a/inst/sql/sql_server/export_v4/condition/sqlConditionsByType.sql +++ /dev/null @@ -1,38 +0,0 @@ -select c1.concept_id as condition_concept_id, - c1.concept_name as condition_concept_name, - c2.concept_group_id as concept_id, - c2.concept_group_name as concept_name, - sum(ar1.count_value) as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join - @vocab_database_schema.concept c1 - on CAST(ar1.stratum_1 AS INT) = c1.concept_id - inner join - ( - select concept_id, - case when concept_name like 'Inpatient%' then 10 - when concept_name like 'Outpatient%' then 20 - else concept_id end - + - case when (concept_name like 'Inpatient%' or concept_name like 'Outpatient%' ) and (concept_name like '%primary%' or concept_name like '%1st position%') then 1 - when (concept_name like 'Inpatient%' or concept_name like 'Outpatient%' ) and (concept_name not like '%primary%' and concept_name not like '%1st position%') then 2 - else 0 end as concept_group_id, - case when concept_name like 'Inpatient%' then 'Claim- Inpatient: ' - when concept_name like 'Outpatient%' then 'Claim- Outpatient: ' - else concept_name end - + - '' - + - case when (concept_name like 'Inpatient%' or concept_name like 'Outpatient%' ) and (concept_name like '%primary%' or concept_name like '%1st position%') then 'Primary diagnosis' - when (concept_name like 'Inpatient%' or concept_name like 'Outpatient%' ) and (concept_name not like '%primary%' and concept_name not like '%1st position%') then 'Secondary diagnosis' - else '' end as concept_group_name - from @vocab_database_schema.concept - where vocabulary_id = 37 - - ) c2 - on CAST(ar1.stratum_2 AS INT) = c2.concept_id -where ar1.analysis_id = 405 -group by c1.concept_id, - c1.concept_name, - c2.concept_group_id, - c2.concept_group_name \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/condition/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/condition/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index e73dfa0a..00000000 --- a/inst/sql/sql_server/export_v4/condition/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT c1.concept_id AS concept_id, - c1.concept_name as concept_name, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 404 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 diff --git a/inst/sql/sql_server/export_v4/condition/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/condition/sqlPrevalenceByMonth.sql deleted file mode 100644 index 08d2ea8a..00000000 --- a/inst/sql/sql_server/export_v4/condition/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,13 +0,0 @@ - select c1.concept_id as concept_id, - c1.concept_name as concept_name, - num.stratum_2 as x_calendar_month, -- calendar year, note, there could be blanks - round(1000*(1.0*num.count_value/denom.count_value),5) as y_prevalence_1000pp --prevalence, per 1000 persons - from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 402) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom - on num.stratum_2 = denom.stratum_1 --calendar year - inner join - @vocab_database_schema.concept c1 - on CAST(num.stratum_1 AS INT) = c1.concept_id -ORDER BY CAST(num.stratum_2 as INT) \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/conditionera/sqlAgeAtFirstDiagnosis.sql b/inst/sql/sql_server/export_v4/conditionera/sqlAgeAtFirstDiagnosis.sql deleted file mode 100644 index 28e2ac9f..00000000 --- a/inst/sql/sql_server/export_v4/conditionera/sqlAgeAtFirstDiagnosis.sql +++ /dev/null @@ -1,16 +0,0 @@ -select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 - on CAST(ard1.stratum_1 AS INT) = c1.concept_id - inner join @vocab_database_schema.concept c2 - on CAST(ard1.stratum_2 AS INT) = c2.concept_id -where ard1.analysis_id = 1006 -and ard1.count_value > 0 diff --git a/inst/sql/sql_server/export_v4/conditionera/sqlConditionEraTreemap.sql b/inst/sql/sql_server/export_v4/conditionera/sqlConditionEraTreemap.sql deleted file mode 100644 index 88f5be52..00000000 --- a/inst/sql/sql_server/export_v4/conditionera/sqlConditionEraTreemap.sql +++ /dev/null @@ -1,101 +0,0 @@ -select concept_hierarchy.concept_id, - isNull(concept_hierarchy.soc_concept_name,'NA') + '||' + isNull(concept_hierarchy.hlgt_concept_name,'NA') + '||' + isNull(concept_hierarchy.hlt_concept_name, 'NA') + '||' + isNull(concept_hierarchy.pt_concept_name,'NA') + '||' + isNull(concept_hierarchy.snomed_concept_name,'NA') concept_path, - ar1.count_value as num_persons, - ROUND(1.0*ar1.count_value / denom.count_value,5) as percent_persons, - ROUND(ar2.avg_value,5) as length_of_era -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 1000) ar1 - inner join - (select stratum_1, avg_value from @results_database_schema.ACHILLES_results_dist where analysis_id = 1007) ar2 - on ar1.stratum_1 = ar2.stratum_1 - inner join - ( - select snomed.concept_id, - snomed.concept_name as snomed_concept_name, - pt_to_hlt.pt_concept_name, - hlt_to_hlgt.hlt_concept_name, - hlgt_to_soc.hlgt_concept_name, - soc.concept_name as soc_concept_name - from - ( - select concept_id, concept_name - from @vocab_database_schema.concept - where vocabulary_id = 1 - ) snomed - left join - (select c1.concept_id as snomed_concept_id, max(c2.concept_id) as pt_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 1 - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'Preferred Term' - group by c1.concept_id - ) snomed_to_pt - on snomed.concept_id = snomed_to_pt.snomed_concept_id - - left join - (select c1.concept_id as pt_concept_id, c1.concept_name as pt_concept_name, max(c2.concept_id) as hlt_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 15 - and c1.concept_class = 'Preferred Term' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'High Level Term' - group by c1.concept_id, c1.concept_name - ) pt_to_hlt - on snomed_to_pt.pt_concept_id = pt_to_hlt.pt_concept_id - - left join - (select c1.concept_id as hlt_concept_id, c1.concept_name as hlt_concept_name, max(c2.concept_id) as hlgt_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 15 - and c1.concept_class = 'High Level Term' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'High Level Group Term' - group by c1.concept_id, c1.concept_name - ) hlt_to_hlgt - on pt_to_hlt.hlt_concept_id = hlt_to_hlgt.hlt_concept_id - - left join - (select c1.concept_id as hlgt_concept_id, c1.concept_name as hlgt_concept_name, max(c2.concept_id) as soc_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 15 - and c1.concept_class = 'High Level Group Term' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 15 - and c2.concept_class = 'System Organ Class' - group by c1.concept_id, c1.concept_name - ) hlgt_to_soc - on hlt_to_hlgt.hlgt_concept_id = hlgt_to_soc.hlgt_concept_id - left join @vocab_database_schema.concept soc - on hlgt_to_soc.soc_concept_id = soc.concept_id - ) concept_hierarchy - on CAST(ar1.stratum_1 AS INT) = concept_hierarchy.concept_id - , - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom - -order by ar1.count_value desc \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/conditionera/sqlLengthOfEra.sql b/inst/sql/sql_server/export_v4/conditionera/sqlLengthOfEra.sql deleted file mode 100644 index 9c86d6ff..00000000 --- a/inst/sql/sql_server/export_v4/conditionera/sqlLengthOfEra.sql +++ /dev/null @@ -1,14 +0,0 @@ -select c1.concept_id as concept_id, - 'Length of era' as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 on CAST(ard1.stratum_1 as INT) = c1.concept_id -where ard1.analysis_id = 1007 and ard1.count_value > 0 - - diff --git a/inst/sql/sql_server/export_v4/conditionera/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/conditionera/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index a225e19a..00000000 --- a/inst/sql/sql_server/export_v4/conditionera/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT c1.concept_id AS concept_id, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 1004 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 diff --git a/inst/sql/sql_server/export_v4/conditionera/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/conditionera/sqlPrevalenceByMonth.sql deleted file mode 100644 index 6081db57..00000000 --- a/inst/sql/sql_server/export_v4/conditionera/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,10 +0,0 @@ -select c1.concept_id as concept_id, - num.stratum_2 as x_calendar_month, - round(1000*(1.0*num.count_value/denom.count_value),5) as y_prevalence_1000pp -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 1002) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom - on num.stratum_2 = denom.stratum_1 --calendar year - inner join - @vocab_database_schema.concept c1 on CAST(num.stratum_1 as INT) = c1.concept_id \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/datadensity/recordsperperson.sql b/inst/sql/sql_server/export_v4/datadensity/recordsperperson.sql deleted file mode 100644 index 7e6a1787..00000000 --- a/inst/sql/sql_server/export_v4/datadensity/recordsperperson.sql +++ /dev/null @@ -1,27 +0,0 @@ -select t1.table_name as SERIES_NAME, - t1.stratum_1 as X_CALENDAR_MONTH, - round(1.0*t1.count_value/denom.count_value,5) as Y_RECORD_COUNT -from -( - select 'Visit occurrence' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 220 - union all - select 'Condition occurrence' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 420 - union all - select 'Death' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 502 - union all - select 'Procedure occurrence' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 620 - union all - select 'Drug exposure' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 720 - union all - select 'Observation' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 820 - union all - select 'Drug era' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 920 - union all - select 'Condition era' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 1020 - union all - select 'Observation period' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 111 -) t1 -inner join -(select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom -on t1.stratum_1 = denom.stratum_1 -ORDER BY SERIES_NAME, CAST(t1.stratum_1 as INT) diff --git a/inst/sql/sql_server/export_v4/datadensity/totalrecords.sql b/inst/sql/sql_server/export_v4/datadensity/totalrecords.sql deleted file mode 100644 index 78d7508f..00000000 --- a/inst/sql/sql_server/export_v4/datadensity/totalrecords.sql +++ /dev/null @@ -1,24 +0,0 @@ -select table_name as SERIES_NAME, - stratum_1 as X_CALENDAR_MONTH, - count_value as Y_RECORD_COUNT -from -( - select 'Visit occurrence' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 220 - union all - select 'Condition occurrence' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 420 - union all - select 'Death' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 502 - union all - select 'Procedure occurrence' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 620 - union all - select 'Drug exposure' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 720 - union all - select 'Observation' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 820 - union all - select 'Drug era' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 920 - union all - select 'Condition era' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 1020 - union all - select 'Observation period' as table_name, stratum_1, count_value from @results_database_schema.ACHILLES_results where analysis_id = 111 -) t1 -ORDER BY SERIES_NAME, CAST(stratum_1 as INT) \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/death/sqlAgeAtDeath.sql b/inst/sql/sql_server/export_v4/death/sqlAgeAtDeath.sql deleted file mode 100644 index ce3ef4a7..00000000 --- a/inst/sql/sql_server/export_v4/death/sqlAgeAtDeath.sql +++ /dev/null @@ -1,12 +0,0 @@ -select c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as P10_value, - ard1.p25_value as P25_value, - ard1.median_value as median_value, - ard1.p75_value as P75_value, - ard1.p90_value as P90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c2 on CAST(ard1.stratum_1 as INT) = c2.concept_id -where ard1.analysis_id = 506 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/death/sqlDeathByType.sql b/inst/sql/sql_server/export_v4/death/sqlDeathByType.sql deleted file mode 100644 index 09e6bfb5..00000000 --- a/inst/sql/sql_server/export_v4/death/sqlDeathByType.sql +++ /dev/null @@ -1,6 +0,0 @@ -select c2.concept_id as concept_id, - c2.concept_name as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join @vocab_database_schema.concept c2 on CAST(ar1.stratum_1 AS INT) = c2.concept_id -where ar1.analysis_id = 505 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/death/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/death/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index 86032e1e..00000000 --- a/inst/sql/sql_server/export_v4/death/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,13 +0,0 @@ -select cast(cast(num.stratum_3 as int)*10 as varchar) + '-' + cast((cast(num.stratum_3 as int)+1)*10-1 as varchar) as trellis_name, --age decile - c2.concept_name as series_name, --gender - num.stratum_1 as x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000*(1.0*num.count_value/denom.count_value),5) as y_prevalence_1000pp --prevalence, per 1000 persons -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 504) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 116) denom on num.stratum_1 = denom.stratum_1 --calendar year - and num.stratum_2 = denom.stratum_2 --gender - and num.stratum_3 = denom.stratum_3 --age decile - inner join @vocab_database_schema.concept c2 on CAST(num.stratum_2 as INT) = c2.concept_id -where c2.concept_id in (8507, 8532) -ORDER BY CAST(num.stratum_1 as INT) diff --git a/inst/sql/sql_server/export_v4/death/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/death/sqlPrevalenceByMonth.sql deleted file mode 100644 index 067c1c45..00000000 --- a/inst/sql/sql_server/export_v4/death/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,7 +0,0 @@ -select num.stratum_1 as x_calendar_month, -- calendar year, note, there could be blanks - 1000*(1.0*num.count_value/denom.count_value) as y_prevalence_1000pp --prevalence, per 1000 persons -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 502) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom on num.stratum_1 = denom.stratum_1 --calendar year -ORDER BY CAST(num.stratum_1 as INT) diff --git a/inst/sql/sql_server/export_v4/drug/sqlAgeAtFirstExposure.sql b/inst/sql/sql_server/export_v4/drug/sqlAgeAtFirstExposure.sql deleted file mode 100644 index 94964450..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlAgeAtFirstExposure.sql +++ /dev/null @@ -1,18 +0,0 @@ -select c1.concept_id as drug_concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on ard1.stratum_1 = CAST(c1.concept_id AS VARCHAR) - inner join - @vocab_database_schema.concept c2 - on ard1.stratum_2 = CAST(c2.concept_id AS VARCHAR) -where ard1.analysis_id = 706 -and ard1.count_value > 0 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drug/sqlDaysSupplyDistribution.sql b/inst/sql/sql_server/export_v4/drug/sqlDaysSupplyDistribution.sql deleted file mode 100644 index e98caf6d..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlDaysSupplyDistribution.sql +++ /dev/null @@ -1,15 +0,0 @@ -select c1.concept_id as drug_concept_id, - 'Days supply' as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on CAST(ard1.stratum_1 AS INT) = c1.concept_id -where ard1.analysis_id = 715 -and ard1.count_value > 0 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drug/sqlDrugTreemap.sql b/inst/sql/sql_server/export_v4/drug/sqlDrugTreemap.sql deleted file mode 100644 index e0c1aa0a..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlDrugTreemap.sql +++ /dev/null @@ -1,98 +0,0 @@ -select concept_hierarchy.concept_id, - isnull(concept_hierarchy.atc1_concept_name,'NA') + '||' + - isnull(concept_hierarchy.atc3_concept_name,'NA') + '||' + - isnull(concept_hierarchy.atc5_concept_name,'NA') + '||' + - isnull(concept_hierarchy.rxnorm_ingredient_concept_name,'NA') + '||' + - concept_hierarchy.rxnorm_concept_name concept_path, - ar1.count_value as num_persons, - round(1.0*ar1.count_value / denom.count_value,5) as percent_persons, - round(1.0*ar2.count_value / ar1.count_value,5) as records_per_person -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 700) ar1 - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 701) ar2 - on ar1.stratum_1 = ar2.stratum_1 - inner join - ( - select rxnorm.concept_id, - rxnorm.concept_name as rxnorm_concept_name, - rxnorm.rxnorm_ingredient_concept_name, - atc5_to_atc3.atc5_concept_name, - atc3_to_atc1.atc3_concept_name, - atc1.concept_name as atc1_concept_name - from - ( - select c1.concept_id, - c1.concept_name, - c2.concept_id as rxnorm_ingredient_concept_id, - c2.concept_name as RxNorm_ingredient_concept_name - from @vocab_database_schema.concept c1 - inner join @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id in (8,82) - inner join @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id in (8,82) - and c2.concept_class = 'Ingredient' - ) rxnorm - left join - (select c1.concept_id as rxnorm_ingredient_concept_id, max(c2.concept_id) as atc5_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id in (8,82) - and c1.concept_class = 'Ingredient' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 21 - and len(c2.concept_code) = 5 - group by c1.concept_id - ) rxnorm_to_atc5 - on rxnorm.rxnorm_ingredient_concept_id = rxnorm_to_atc5.rxnorm_ingredient_concept_id - - left join - (select c1.concept_id as atc5_concept_id, c1.concept_name as atc5_concept_name, max(c2.concept_id) as atc3_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 21 - and len(c1.concept_code) = 5 - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 21 - and len(c2.concept_code) = 3 - group by c1.concept_id, c1.concept_name - ) atc5_to_atc3 - on rxnorm_to_atc5.atc5_concept_id = atc5_to_atc3.atc5_concept_id - - left join - (select c1.concept_id as atc3_concept_id, c1.concept_name as atc3_concept_name, max(c2.concept_id) as atc1_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 21 - and len(c1.concept_code) = 3 - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 21 - and len(c2.concept_code) = 1 - group by c1.concept_id, c1.concept_name - ) atc3_to_atc1 - on atc5_to_atc3.atc3_concept_id = atc3_to_atc1.atc3_concept_id - - left join @vocab_database_schema.concept atc1 - on atc3_to_atc1.atc1_concept_id = atc1.concept_id - ) concept_hierarchy - on ar1.stratum_1 = CAST(concept_hierarchy.concept_id AS VARCHAR) - , - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom - -order by ar1.count_value desc diff --git a/inst/sql/sql_server/export_v4/drug/sqlDrugsByType.sql b/inst/sql/sql_server/export_v4/drug/sqlDrugsByType.sql deleted file mode 100644 index c6f4ee1d..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlDrugsByType.sql +++ /dev/null @@ -1,12 +0,0 @@ -select c1.concept_id as drug_concept_id, - c2.concept_id as concept_id, - c2.concept_name as concept_name, - ar1.count_value as count_value -from ( - select cast(stratum_1 as int) stratum_1, cast(stratum_2 as int) stratum_2, count_value - FROM @results_database_schema.ACHILLES_results - where analysis_id = 705 - GROUP BY analysis_id, stratum_1, stratum_2, count_value -) ar1 -inner join @vocab_database_schema.concept c1 on ar1.stratum_1 = c1.concept_id -inner join @vocab_database_schema.concept c2 on ar1.stratum_2 = c2.concept_id diff --git a/inst/sql/sql_server/export_v4/drug/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/drug/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index 22a0f724..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT c1.concept_id AS concept_id, - c1.concept_name as concept_name, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 704 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 diff --git a/inst/sql/sql_server/export_v4/drug/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/drug/sqlPrevalenceByMonth.sql deleted file mode 100644 index 8567fa17..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,13 +0,0 @@ -select c1.concept_id as concept_id, - c1.concept_name as concept_name, - num.stratum_2 as x_calendar_month, - round(1000*(1.0*num.count_value/denom.count_value),5) as y_prevalence_1000pp -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 702) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom - on num.stratum_2 = denom.stratum_1 --calendar year - inner join - @vocab_database_schema.concept c1 - on CAST(num.stratum_1 AS INT) = c1.concept_id -ORDER BY CAST(num.stratum_2 as INT) \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drug/sqlQuantityDistribution.sql b/inst/sql/sql_server/export_v4/drug/sqlQuantityDistribution.sql deleted file mode 100644 index 819dd54f..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlQuantityDistribution.sql +++ /dev/null @@ -1,15 +0,0 @@ -select c1.concept_id as drug_concept_id, - 'Quantity' as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on CAST(ard1.stratum_1 AS INT) = c1.concept_id -where ard1.analysis_id = 717 -and ard1.count_value > 0 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drug/sqlRefillsDistribution.sql b/inst/sql/sql_server/export_v4/drug/sqlRefillsDistribution.sql deleted file mode 100644 index 13e5f850..00000000 --- a/inst/sql/sql_server/export_v4/drug/sqlRefillsDistribution.sql +++ /dev/null @@ -1,15 +0,0 @@ -select c1.concept_id as drug_concept_id, - 'Refills' as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on CAST(ard1.stratum_1 AS INT) = c1.concept_id -where ard1.analysis_id = 716 -and ard1.count_value > 0 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drugera/sqlAgeAtFirstExposure.sql b/inst/sql/sql_server/export_v4/drugera/sqlAgeAtFirstExposure.sql deleted file mode 100644 index d1a6822b..00000000 --- a/inst/sql/sql_server/export_v4/drugera/sqlAgeAtFirstExposure.sql +++ /dev/null @@ -1,18 +0,0 @@ -select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on ard1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join - @vocab_database_schema.concept c2 - on ard1.stratum_2 = cast(c2.concept_id as VARCHAR) -where ard1.analysis_id = 906 -and ard1.count_value > 0 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drugera/sqlDrugEraTreemap.sql b/inst/sql/sql_server/export_v4/drugera/sqlDrugEraTreemap.sql deleted file mode 100644 index cb83fb91..00000000 --- a/inst/sql/sql_server/export_v4/drugera/sqlDrugEraTreemap.sql +++ /dev/null @@ -1,85 +0,0 @@ -select concept_hierarchy.rxnorm_ingredient_concept_id concept_id, - isnull(concept_hierarchy.atc1_concept_name,'NA') + '||' + - isnull(concept_hierarchy.atc3_concept_name,'NA') + '||' + - isnull(concept_hierarchy.atc5_concept_name,'NA') + '||' + - isnull(concept_hierarchy.rxnorm_ingredient_concept_name,'||') concept_path, - ar1.count_value as num_persons, - 1.0*ar1.count_value / denom.count_value as percent_persons, - ar2.avg_value as length_of_era -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 900) ar1 - inner join - (select stratum_1, avg_value from @results_database_schema.ACHILLES_results_dist where analysis_id = 907) ar2 - on ar1.stratum_1 = ar2.stratum_1 - inner join - ( - select rxnorm.rxnorm_ingredient_concept_id, - rxnorm.rxnorm_ingredient_concept_name, - atc5_to_atc3.atc5_concept_name, - atc3_to_atc1.atc3_concept_name, - atc1.concept_name as atc1_concept_name - from - ( - select c1.concept_id as rxnorm_ingredient_concept_id, - c1.concept_name as RxNorm_ingredient_concept_name - from @vocab_database_schema.concept c1 - where c1.vocabulary_id in (8,82) - and c1.concept_class = 'Ingredient' - ) rxnorm - left join - (select c1.concept_id as rxnorm_ingredient_concept_id, max(c2.concept_id) as atc5_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id in (8,82) - and c1.concept_class = 'Ingredient' - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 21 - and len(c2.concept_code) = 5 - group by c1.concept_id - ) rxnorm_to_atc5 - on rxnorm.rxnorm_ingredient_concept_id = rxnorm_to_atc5.rxnorm_ingredient_concept_id - left join - (select c1.concept_id as atc5_concept_id, c1.concept_name as atc5_concept_name, max(c2.concept_id) as atc3_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 21 - and len(c1.concept_code) = 5 - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 21 - and len(c2.concept_code) = 3 - group by c1.concept_id, c1.concept_name - ) atc5_to_atc3 - on rxnorm_to_atc5.atc5_concept_id = atc5_to_atc3.atc5_concept_id - left join - (select c1.concept_id as atc3_concept_id, c1.concept_name as atc3_concept_name, max(c2.concept_id) as atc1_concept_id - from - @vocab_database_schema.concept c1 - inner join - @vocab_database_schema.concept_ancestor ca1 - on c1.concept_id = ca1.descendant_concept_id - and c1.vocabulary_id = 21 - and len(c1.concept_code) = 3 - inner join - @vocab_database_schema.concept c2 - on ca1.ancestor_concept_id = c2.concept_id - and c2.vocabulary_id = 21 - and len(c2.concept_code) = 1 - group by c1.concept_id, c1.concept_name - ) atc3_to_atc1 - on atc5_to_atc3.atc3_concept_id = atc3_to_atc1.atc3_concept_id - left join @vocab_database_schema.concept atc1 - on atc3_to_atc1.atc1_concept_id = atc1.concept_id - ) concept_hierarchy - on ar1.stratum_1 = CAST(concept_hierarchy.rxnorm_ingredient_concept_id AS VARCHAR) - , - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom -order by ar1.count_value desc diff --git a/inst/sql/sql_server/export_v4/drugera/sqlLengthOfEra.sql b/inst/sql/sql_server/export_v4/drugera/sqlLengthOfEra.sql deleted file mode 100644 index 9916915b..00000000 --- a/inst/sql/sql_server/export_v4/drugera/sqlLengthOfEra.sql +++ /dev/null @@ -1,15 +0,0 @@ -select c1.concept_id as concept_id, - 'Length of Era' as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on ard1.stratum_1 = CAST(c1.concept_id as VARCHAR) -where ard1.analysis_id = 907 -and ard1.count_value > 0 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/drugera/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/drugera/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index 0c2be95b..00000000 --- a/inst/sql/sql_server/export_v4/drugera/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT c1.concept_id AS concept_id, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 904 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 diff --git a/inst/sql/sql_server/export_v4/drugera/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/drugera/sqlPrevalenceByMonth.sql deleted file mode 100644 index 12fb72bd..00000000 --- a/inst/sql/sql_server/export_v4/drugera/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,12 +0,0 @@ -select c1.concept_id as concept_id, - num.stratum_2 as x_calendar_month, - round(1000*(1.0*num.count_value/denom.count_value),5) as y_prevalence_1000pp -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 902) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom - on num.stratum_2 = denom.stratum_1 - inner join - @vocab_database_schema.concept c1 - on num.stratum_1 = CAST(c1.concept_id as VARCHAR) -ORDER BY CAST(num.stratum_2 as INT) \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observation/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export_v4/observation/sqlAgeAtFirstOccurrence.sql deleted file mode 100644 index 6fe9120c..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlAgeAtFirstOccurrence.sql +++ /dev/null @@ -1,13 +0,0 @@ -select c1.concept_id as CONCEPT_ID, - c2.concept_name as CATEGORY, - ard1.min_value as MIN_VALUE, - ard1.p10_value as P10_VALUE, - ard1.p25_value as P25_VALUE, - ard1.median_value as MEDIAN_VALUE, - ard1.p75_value as P75_VALUE, - ard1.p90_value as P90_VALUE, - ard1.max_value as MAX_VALUE -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 on ard1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join @vocab_database_schema.concept c2 on ard1.stratum_2 = CAST(c2.concept_id as VARCHAR) -where ard1.analysis_id = 806 diff --git a/inst/sql/sql_server/export_v4/observation/sqlLowerLimitDistribution.sql b/inst/sql/sql_server/export_v4/observation/sqlLowerLimitDistribution.sql deleted file mode 100644 index 50a1447a..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlLowerLimitDistribution.sql +++ /dev/null @@ -1,14 +0,0 @@ -select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as P10_value, - ard1.p25_value as P25_value, - ard1.median_value as median_value, - ard1.p75_value as P75_value, - ard1.p90_value as P90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 on ard1.stratum_1 = CAST(c1.concept_id AS VARCHAR) - inner join @vocab_database_schema.concept c2 on ard1.stratum_2 = cast(c2.concept_id AS VARCHAR) -where ard1.analysis_id = 816 -and ard1.count_value > 0 diff --git a/inst/sql/sql_server/export_v4/observation/sqlObservationTreemap.sql b/inst/sql/sql_server/export_v4/observation/sqlObservationTreemap.sql deleted file mode 100644 index 75c26932..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlObservationTreemap.sql +++ /dev/null @@ -1,30 +0,0 @@ -select concept_hierarchy.concept_id, - isNull(concept_hierarchy.level3_concept_name,'NA') - + '||' + isNull(concept_hierarchy.level2_concept_name,'NA') - + '||' + isNull(concept_hierarchy.level1_concept_name,'NA') - + '||' + isNull(concept_hierarchy.concept_name, 'NA') as concept_path, - ar1.count_value as num_persons, - 1.0*ar1.count_value / denom.count_value as percent_persons, - 1.0*ar2.count_value / ar1.count_value as records_per_person -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 800) ar1 - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 801) ar2 - on ar1.stratum_1 = ar2.stratum_1 - inner join - ( - select obs.concept_id, obs.concept_name, max(c1.concept_name) as level1_concept_name, max(c2.concept_name) as level2_concept_name, max(c3.concept_name) as level3_concept_name - from - ( - select concept_id, concept_name - from @vocab_database_schema.concept - where vocabulary_id = 6 - ) obs left join @vocab_database_schema.concept_ancestor ca1 on obs.concept_id = ca1.DESCENDANT_CONCEPT_ID and ca1.min_levels_of_separation = 1 - left join @vocab_database_schema.concept c1 on ca1.ANCESTOR_CONCEPT_ID = c1.concept_id - left join @vocab_database_schema.concept_ancestor ca2 on c1.concept_id = ca2.DESCENDANT_CONCEPT_ID and ca2.min_levels_of_separation = 1 - left join @vocab_database_schema.concept c2 on ca2.ANCESTOR_CONCEPT_ID = c2.concept_id - left join @vocab_database_schema.concept_ancestor ca3 on c2.concept_id = ca3.DESCENDANT_CONCEPT_ID and ca3.min_levels_of_separation = 1 - left join @vocab_database_schema.concept c3 on ca3.ANCESTOR_CONCEPT_ID = c3.concept_id - group by obs.concept_id, obs.concept_name - ) concept_hierarchy on ar1.stratum_1 = CAST(concept_hierarchy.concept_id as VARCHAR), - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom -order by ar1.count_value desc diff --git a/inst/sql/sql_server/export_v4/observation/sqlObservationValueDistribution.sql b/inst/sql/sql_server/export_v4/observation/sqlObservationValueDistribution.sql deleted file mode 100644 index 99cae1de..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlObservationValueDistribution.sql +++ /dev/null @@ -1,15 +0,0 @@ -select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as P10_value, - ard1.p25_value as P25_value, - ard1.median_value as median_value, - ard1.p75_value as P75_value, - ard1.p90_value as P90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 on ard1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join @vocab_database_schema.concept c2 on ard1.stratum_2 = CAST(c2.concept_id as VARCHAR) -where ard1.analysis_id = 815 -and ard1.count_value > 0 - diff --git a/inst/sql/sql_server/export_v4/observation/sqlObservationsByType.sql b/inst/sql/sql_server/export_v4/observation/sqlObservationsByType.sql deleted file mode 100644 index 7099d0fc..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlObservationsByType.sql +++ /dev/null @@ -1,9 +0,0 @@ -select c1.concept_id as OBSERVATION_CONCEPT_ID, - c1.concept_name as OBSERVATION_CONCEPT_NAME, - c2.concept_id as CONCEPT_ID, - c2.concept_name as CONCEPT_NAME, - ar1.count_value as COUNT_VALUE -from @results_database_schema.ACHILLES_results ar1 - inner join @vocab_database_schema.concept c1 on ar1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join @vocab_database_schema.concept c2 on ar1.stratum_2 = CAST(c2.concept_id as VARCHAR) -where ar1.analysis_id = 805 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observation/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/observation/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index 0de8ae1e..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT c1.concept_id AS concept_id, - c1.concept_name as concept_name, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 804 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 diff --git a/inst/sql/sql_server/export_v4/observation/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/observation/sqlPrevalenceByMonth.sql deleted file mode 100644 index 7a5948f0..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,10 +0,0 @@ -select c1.concept_id as CONCEPT_ID, --all rows for all concepts, but you may split by conceptid - c1.concept_name as CONCEPT_NAME, - num.stratum_2 as X_CALENDAR_MONTH, -- calendar year, note, there could be blanks - round(1000*(1.0*num.count_value/denom.count_value),5) as Y_PREVALENCE_1000PP --prevalence, per 1000 persons -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 802) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom on num.stratum_2 = denom.stratum_1 --calendar year - inner join @vocab_database_schema.concept c1 on num.stratum_1 = CAST(c1.concept_id as VARCHAR) -ORDER BY CAST(num.stratum_2 as INT) diff --git a/inst/sql/sql_server/export_v4/observation/sqlRecordsByUnit.sql b/inst/sql/sql_server/export_v4/observation/sqlRecordsByUnit.sql deleted file mode 100644 index 308a0562..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlRecordsByUnit.sql +++ /dev/null @@ -1,9 +0,0 @@ -select c1.concept_id as observation_concept_id, - c1.concept_name as observation_concept_name, - c2.concept_id as concept_id, - c2.concept_name as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join @vocab_database_schema.concept c1 on ar1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join @vocab_database_schema.concept c2 on ar1.stratum_2 = CAST(c2.concept_id as VARCHAR) -where ar1.analysis_id = 807 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observation/sqlUpperLimitDistribution.sql b/inst/sql/sql_server/export_v4/observation/sqlUpperLimitDistribution.sql deleted file mode 100644 index bdc96618..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlUpperLimitDistribution.sql +++ /dev/null @@ -1,14 +0,0 @@ -select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as P10_value, - ard1.p25_value as P25_value, - ard1.median_value as median_value, - ard1.p75_value as P75_value, - ard1.p90_value as P90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 on ard1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join @vocab_database_schema.concept c2 on ard1.stratum_2 = CAST(c2.concept_id as VARCHAR) -where ard1.analysis_id = 817 -and ard1.count_value > 0 diff --git a/inst/sql/sql_server/export_v4/observation/sqlValuesRelativeToNorm.sql b/inst/sql/sql_server/export_v4/observation/sqlValuesRelativeToNorm.sql deleted file mode 100644 index 6901d736..00000000 --- a/inst/sql/sql_server/export_v4/observation/sqlValuesRelativeToNorm.sql +++ /dev/null @@ -1,9 +0,0 @@ -select c1.concept_id as observation_concept_id, - c1.concept_name as observation_concept_name, - c2.concept_id as concept_id, - c2.concept_name + ': ' + ar1.stratum_3 as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join @vocab_database_schema.concept c1 on ar1.stratum_1 = CAST(c1.concept_id as VARCHAR) - inner join @vocab_database_schema.concept c2 on ar1.stratum_2 = CAST(c2.concept_id as VARCHAR) -where ar1.analysis_id = 818 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/ageatfirst.sql b/inst/sql/sql_server/export_v4/observationperiod/ageatfirst.sql deleted file mode 100644 index c2dd70a6..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/ageatfirst.sql +++ /dev/null @@ -1,11 +0,0 @@ -select cast(ar1.stratum_1 as int) as interval_index, - ar1.count_value as count_value, - round(1.0*ar1.count_value / denom.count_value,5) as percent_value -from -( - select * from @results_database_schema.ACHILLES_results where analysis_id = 101 -) ar1, -( - select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1 -) denom -order by cast(ar1.stratum_1 as int) asc \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/agebygender.sql b/inst/sql/sql_server/export_v4/observationperiod/agebygender.sql deleted file mode 100644 index ece06e11..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/agebygender.sql +++ /dev/null @@ -1,11 +0,0 @@ -select c1.concept_name as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -inner join @vocab_database_schema.concept c1 on CAST(ard1.stratum_1 AS INT) = c1.concept_id -where ard1.analysis_id = 104 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/cumulativeduration.sql b/inst/sql/sql_server/export_v4/observationperiod/cumulativeduration.sql deleted file mode 100644 index 7542e49c..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/cumulativeduration.sql +++ /dev/null @@ -1,13 +0,0 @@ -select 'Length of observation' as series_name, - cast(ar1.stratum_1 as int)*30 as x_length_of_observation, - round(1.0*sum(ar2.count_value) / denom.count_value,5) as y_percent_persons -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 108) ar1 -inner join -( - select * from @results_database_schema.ACHILLES_results where analysis_id = 108 -) ar2 on ar1.analysis_id = ar2.analysis_id and cast(ar1.stratum_1 as int) <= cast(ar2.stratum_1 as int), -( - select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1 -) denom -group by cast(ar1.stratum_1 as int)*30, denom.count_value -order by cast(ar1.stratum_1 as int)*30 asc diff --git a/inst/sql/sql_server/export_v4/observationperiod/observationlength_data.sql b/inst/sql/sql_server/export_v4/observationperiod/observationlength_data.sql deleted file mode 100644 index 24644fc7..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/observationlength_data.sql +++ /dev/null @@ -1,10 +0,0 @@ -select cast(ar1.stratum_1 as int) as interval_index, - ar1.count_value as count_value, - round(1.0*ar1.count_value / denom.count_value,5) as percent_value -from @results_database_schema.ACHILLES_analysis aa1 -inner join @results_database_schema.ACHILLES_results ar1 on aa1.analysis_id = ar1.analysis_id, -( - select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1 -) denom -where aa1.analysis_id = 108 -order by cast(ar1.stratum_1 as int) asc \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/observationlengthbyage.sql b/inst/sql/sql_server/export_v4/observationperiod/observationlengthbyage.sql deleted file mode 100644 index 6148dd05..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/observationlengthbyage.sql +++ /dev/null @@ -1,11 +0,0 @@ - select cast(cast(ard1.stratum_1 as int)*10 as varchar) + '-' + cast((cast(ard1.stratum_1 as int)+1)*10-1 as varchar) as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 107 -order by cast(ard1.stratum_1 as int) asc \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/observationlengthbygender.sql b/inst/sql/sql_server/export_v4/observationperiod/observationlengthbygender.sql deleted file mode 100644 index 19e64221..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/observationlengthbygender.sql +++ /dev/null @@ -1,11 +0,0 @@ -select c1.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -inner join @vocab_database_schema.concept c1 on CAST(ard1.stratum_1 AS INT) = c1.concept_id -where ard1.analysis_id = 106 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/observedbymonth.sql b/inst/sql/sql_server/export_v4/observationperiod/observedbymonth.sql deleted file mode 100644 index 84b3a8f5..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/observedbymonth.sql +++ /dev/null @@ -1,7 +0,0 @@ -select cast(ar1.stratum_1 as int) as month_year, - ar1.count_value as count_value, - round(1.0*ar1.count_value / denom.count_value,5) as percent_value -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 110) ar1, - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom -order by ar1.stratum_1 asc - \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/observationperiod/observedbyyear_data.sql b/inst/sql/sql_server/export_v4/observationperiod/observedbyyear_data.sql deleted file mode 100644 index b3552be9..00000000 --- a/inst/sql/sql_server/export_v4/observationperiod/observedbyyear_data.sql +++ /dev/null @@ -1,15 +0,0 @@ -select cast(ar1.stratum_1 as int) - MinValue.MinValue as interval_index, - ar1.count_value as count_value, - round(1.0*ar1.count_value / denom.count_value,5) as percent_value -from -( - select * from @results_database_schema.ACHILLES_results where analysis_id = 109 -) ar1, -( - select min(cast(stratum_1 as int)) as MinValue - from @results_database_schema.ACHILLES_results where analysis_id = 109 -) MinValue, -( - select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1 -) denom -order by ar1.stratum_1 asc \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/person/ethnicity.sql b/inst/sql/sql_server/export_v4/person/ethnicity.sql deleted file mode 100644 index 046d18c8..00000000 --- a/inst/sql/sql_server/export_v4/person/ethnicity.sql +++ /dev/null @@ -1,8 +0,0 @@ -select c1.concept_id as concept_id, - c1.concept_name as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join - @vocab_database_schema.concept c1 - on CAST(ar1.stratum_1 AS INT) = c1.concept_id -where ar1.analysis_id = 5 diff --git a/inst/sql/sql_server/export_v4/person/gender.sql b/inst/sql/sql_server/export_v4/person/gender.sql deleted file mode 100644 index 63dec7d4..00000000 --- a/inst/sql/sql_server/export_v4/person/gender.sql +++ /dev/null @@ -1,9 +0,0 @@ -select c1.concept_id as concept_id, - c1.concept_name as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join - @vocab_database_schema.concept c1 - on CAST(ar1.stratum_1 AS INT) = c1.concept_id -where ar1.analysis_id = 2 -and c1.concept_id in (8507, 8532) \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/person/race.sql b/inst/sql/sql_server/export_v4/person/race.sql deleted file mode 100644 index b5828733..00000000 --- a/inst/sql/sql_server/export_v4/person/race.sql +++ /dev/null @@ -1,8 +0,0 @@ -select c1.concept_id as concept_id, - c1.concept_name as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join - @vocab_database_schema.concept c1 - on CAST(ar1.stratum_1 AS INT) = c1.concept_id -where ar1.analysis_id = 4 diff --git a/inst/sql/sql_server/export_v4/person/yearofbirth_data.sql b/inst/sql/sql_server/export_v4/person/yearofbirth_data.sql deleted file mode 100644 index c70069ee..00000000 --- a/inst/sql/sql_server/export_v4/person/yearofbirth_data.sql +++ /dev/null @@ -1,7 +0,0 @@ -select cast(ar1.stratum_1 as int) - MinValue.MinValue as interval_index, - ar1.count_value as count_value, - round(1.0*ar1.count_value / denom.count_value,5) as percent_value -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 3) ar1, - (select min(cast(stratum_1 as int)) as MinValue from @results_database_schema.ACHILLES_results where analysis_id = 3) MinValue, - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom -order by ar1.stratum_1 asc diff --git a/inst/sql/sql_server/export_v4/procedure/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export_v4/procedure/sqlAgeAtFirstOccurrence.sql deleted file mode 100644 index b76fc86c..00000000 --- a/inst/sql/sql_server/export_v4/procedure/sqlAgeAtFirstOccurrence.sql +++ /dev/null @@ -1,17 +0,0 @@ - select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 - on CAST(ard1.stratum_1 AS INT) = c1.concept_id - inner join - @vocab_database_schema.concept c2 - on CAST(ard1.stratum_2 AS INT) = c2.concept_id -where ard1.analysis_id = 606 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/procedure/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/procedure/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index 46a6c5df..00000000 --- a/inst/sql/sql_server/export_v4/procedure/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT c1.concept_id AS concept_id, - c1.concept_name as concept_name, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 604 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/procedure/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/procedure/sqlPrevalenceByMonth.sql deleted file mode 100644 index b32de147..00000000 --- a/inst/sql/sql_server/export_v4/procedure/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,11 +0,0 @@ - select c1.concept_id as concept_id, - c1.concept_name as concept_name, - num.stratum_2 as x_calendar_month, -- calendar year, note, there could be blanks - round(1000*(1.0*num.count_value/denom.count_value),5) as y_prevalence_1000pp --prevalence, per 1000 persons -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 602) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) - denom on num.stratum_2 = denom.stratum_1 --calendar year - inner join @vocab_database_schema.concept c1 on CAST(num.stratum_1 AS INT) = c1.concept_id -ORDER BY CAST(num.stratum_2 as INT) diff --git a/inst/sql/sql_server/export_v4/procedure/sqlProcedureTreemap.sql b/inst/sql/sql_server/export_v4/procedure/sqlProcedureTreemap.sql deleted file mode 100644 index f66c1ae4..00000000 --- a/inst/sql/sql_server/export_v4/procedure/sqlProcedureTreemap.sql +++ /dev/null @@ -1,122 +0,0 @@ -select concept_hierarchy.concept_id, - isNull(concept_hierarchy.level4_concept_name,'NA') - + '||' + isNull(concept_hierarchy.level3_concept_name,'NA') - + '||' + isNull(concept_hierarchy.level2_concept_name,'NA') - + '||' + isNull(concept_hierarchy.proc_concept_name,'NA') concept_path, - ar1.count_value as num_persons, - 1.0*ar1.count_value / denom.count_value as percent_persons, - 1.0*ar2.count_value / ar1.count_value as records_per_person -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 600) ar1 - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 601) ar2 - on ar1.stratum_1 = ar2.stratum_1 - inner join - ( - select procs.concept_id, - procs.proc_concept_name, - max(proc_hierarchy.os3_concept_name) as level2_concept_name, - max(proc_hierarchy.os2_concept_name) as level3_concept_name, - max(proc_hierarchy.os1_concept_name) as level4_concept_name - from - ( - select c1.concept_id, - v1.vocabulary_name + ' ' + c1.concept_code + ': ' + c1.concept_name as proc_concept_name - from @vocab_database_schema.concept c1 - inner join @vocab_database_schema.vocabulary v1 - on c1.vocabulary_id = v1.vocabulary_id - where ( - c1.vocabulary_id in (3,4,5) - or (c1.vocabulary_id = 1 and c1.concept_class = 'Procedure') - ) - ) procs - - left join - (select ca0.DESCENDANT_CONCEPT_ID, max(ca0.ancestor_concept_id) as ancestor_concept_id - from @vocab_database_schema.concept_ancestor ca0 - inner join - (select distinct c2.concept_id as os3_concept_id - from @vocab_database_schema.concept_ancestor ca1 - inner join - @vocab_database_schema.concept c1 - on ca1.DESCENDANT_CONCEPT_ID = c1.concept_id - inner join - @vocab_database_schema.concept_ancestor ca2 - on c1.concept_id = ca2.ANCESTOR_CONCEPT_ID - inner join - @vocab_database_schema.concept c2 - on ca2.DESCENDANT_CONCEPT_ID = c2.concept_id - where ca1.ancestor_concept_id = 4040390 - and ca1.Min_LEVELS_OF_SEPARATION = 2 - and ca2.MIN_LEVELS_OF_SEPARATION = 1 - ) t1 - - on ca0.ANCESTOR_CONCEPT_ID = t1.os3_concept_id - - group by ca0.descendant_concept_id - - ) ca1 - on procs.concept_id = ca1.DESCENDANT_CONCEPT_ID - left join - ( - select proc_by_os1.os1_concept_name, - proc_by_os2.os2_concept_name, - proc_by_os3.os3_concept_name, - proc_by_os3.os3_concept_id - from - (select DESCENDANT_CONCEPT_ID as os1_concept_id, concept_name as os1_concept_name - from @vocab_database_schema.concept_ancestor ca1 - inner join - @vocab_database_schema.concept c1 - on ca1.DESCENDANT_CONCEPT_ID = c1.concept_id - where ancestor_concept_id = 4040390 - and Min_LEVELS_OF_SEPARATION = 1 - ) proc_by_os1 - - inner join - (select max(c1.CONCEPT_ID) as os1_concept_id, c2.concept_id as os2_concept_id, c2.concept_name as os2_concept_name - from @vocab_database_schema.concept_ancestor ca1 - inner join - @vocab_database_schema.concept c1 - on ca1.DESCENDANT_CONCEPT_ID = c1.concept_id - inner join - @vocab_database_schema.concept_ancestor ca2 - on c1.concept_id = ca2.ANCESTOR_CONCEPT_ID - inner join - @vocab_database_schema.concept c2 - on ca2.DESCENDANT_CONCEPT_ID = c2.concept_id - where ca1.ancestor_concept_id = 4040390 - and ca1.Min_LEVELS_OF_SEPARATION = 1 - and ca2.MIN_LEVELS_OF_SEPARATION = 1 - group by c2.concept_id, c2.concept_name - ) proc_by_os2 - on proc_by_os1.os1_concept_id = proc_by_os2.os1_concept_id - - inner join - (select max(c1.CONCEPT_ID) as os2_concept_id, c2.concept_id as os3_concept_id, c2.concept_name as os3_concept_name - from @vocab_database_schema.concept_ancestor ca1 - inner join - @vocab_database_schema.concept c1 - on ca1.DESCENDANT_CONCEPT_ID = c1.concept_id - inner join - @vocab_database_schema.concept_ancestor ca2 - on c1.concept_id = ca2.ANCESTOR_CONCEPT_ID - inner join - @vocab_database_schema.concept c2 - on ca2.DESCENDANT_CONCEPT_ID = c2.concept_id - where ca1.ancestor_concept_id = 4040390 - and ca1.Min_LEVELS_OF_SEPARATION = 2 - and ca2.MIN_LEVELS_OF_SEPARATION = 1 - group by c2.concept_id, c2.concept_name - ) proc_by_os3 - on proc_by_os2.os2_concept_id = proc_by_os3.os2_concept_id - ) proc_hierarchy - on ca1.ancestor_concept_id = proc_hierarchy.os3_concept_id - group by procs.concept_id, - procs.proc_concept_name - - ) concept_hierarchy - on CAST(ar1.stratum_1 AS INT) = concept_hierarchy.concept_id - , - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom - -order by ar1.count_value desc diff --git a/inst/sql/sql_server/export_v4/procedure/sqlProceduresByType.sql b/inst/sql/sql_server/export_v4/procedure/sqlProceduresByType.sql deleted file mode 100644 index 05916eb6..00000000 --- a/inst/sql/sql_server/export_v4/procedure/sqlProceduresByType.sql +++ /dev/null @@ -1,9 +0,0 @@ -select c1.concept_id as procedure_concept_id, - c1.concept_name as procedure_concept_name, - c2.concept_id as concept_id, - c2.concept_name as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 - inner join @vocab_database_schema.concept c1 on CAST(ar1.stratum_1 AS INT) = c1.concept_id - inner join @vocab_database_schema.concept c2 on CAST(ar1.stratum_2 AS INT) = c2.concept_id -where ar1.analysis_id = 605 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v4/visit/sqlAgeAtFirstOccurrence.sql b/inst/sql/sql_server/export_v4/visit/sqlAgeAtFirstOccurrence.sql deleted file mode 100644 index a990aa58..00000000 --- a/inst/sql/sql_server/export_v4/visit/sqlAgeAtFirstOccurrence.sql +++ /dev/null @@ -1,13 +0,0 @@ -select c1.concept_id as concept_id, - c2.concept_name as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join @vocab_database_schema.concept c1 on CAST(ard1.stratum_1 AS INT) = c1.concept_id - inner join @vocab_database_schema.concept c2 on CAST(ard1.stratum_2 AS INT) = c2.concept_id -where ard1.analysis_id = 206 diff --git a/inst/sql/sql_server/export_v4/visit/sqlPrevalenceByGenderAgeYear.sql b/inst/sql/sql_server/export_v4/visit/sqlPrevalenceByGenderAgeYear.sql deleted file mode 100644 index 7e9945ac..00000000 --- a/inst/sql/sql_server/export_v4/visit/sqlPrevalenceByGenderAgeYear.sql +++ /dev/null @@ -1,36 +0,0 @@ -SELECT c1.concept_id AS concept_id, - c1.concept_name as concept_name, - cast(num_stratum_4 * 10 AS VARCHAR) + '-' + cast((num_stratum_4 + 1) * 10 - 1 AS VARCHAR) AS trellis_name, --age decile - c2.concept_name AS series_name, --gender - num_stratum_2 AS x_calendar_year, -- calendar year, note, there could be blanks - ROUND(1000 * (1.0 * num_count_value / denom_count_value), 5) AS y_prevalence_1000pp --prevalence, per 1000 persons -FROM ( - SELECT CAST(num.stratum_1 AS INT) AS num_stratum_1, - CAST(num.stratum_2 AS INT) AS num_stratum_2, - CAST(num.stratum_3 AS INT) AS num_stratum_3, - CAST(num.stratum_4 AS INT) AS num_stratum_4, - num.count_value AS num_count_value, - denom.count_value AS denom_count_value - FROM ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 204 - AND stratum_3 IN ('8507', '8532') - ) num - INNER JOIN ( - SELECT * - FROM @results_database_schema.ACHILLES_results - WHERE analysis_id = 116 - AND stratum_2 IN ('8507', '8532') - ) denom - ON num.stratum_2 = denom.stratum_1 - AND num.stratum_3 = denom.stratum_2 - AND num.stratum_4 = denom.stratum_3 - ) tmp -INNER JOIN @vocab_database_schema.concept c1 - ON num_stratum_1 = c1.concept_id -INNER JOIN @vocab_database_schema.concept c2 - ON num_stratum_3 = c2.concept_id -ORDER BY c1.concept_id, - num_stratum_2 - diff --git a/inst/sql/sql_server/export_v4/visit/sqlPrevalenceByMonth.sql b/inst/sql/sql_server/export_v4/visit/sqlPrevalenceByMonth.sql deleted file mode 100644 index dabc81ff..00000000 --- a/inst/sql/sql_server/export_v4/visit/sqlPrevalenceByMonth.sql +++ /dev/null @@ -1,10 +0,0 @@ -select c1.concept_id as concept_id, --all rows for all concepts, but you may split by conceptid - c1.concept_name as concept_name, - num.stratum_2 as x_calendar_month, -- calendar year, note, there could be blanks - 1000*(1.0*num.count_value/denom.count_value) as y_prevalence_1000pp --prevalence, per 1000 persons -from - (select * from @results_database_schema.ACHILLES_results where analysis_id = 202) num - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 117) denom on num.stratum_2 = denom.stratum_1 --calendar year - inner join @vocab_database_schema.concept c1 on CAST(num.stratum_1 AS INT) = c1.concept_id -ORDER BY CAST(num.stratum_2 as INT) diff --git a/inst/sql/sql_server/export_v4/visit/sqlVisitDurationByType.sql b/inst/sql/sql_server/export_v4/visit/sqlVisitDurationByType.sql deleted file mode 100644 index ed89ba10..00000000 --- a/inst/sql/sql_server/export_v4/visit/sqlVisitDurationByType.sql +++ /dev/null @@ -1,13 +0,0 @@ -select c1.concept_id as concept_id, - 'Length of stay' as category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 - inner join - @vocab_database_schema.concept c1 on CAST(ard1.stratum_1 AS INT) = c1.concept_id -where ard1.analysis_id = 211 diff --git a/inst/sql/sql_server/export_v4/visit/sqlVisitTreemap.sql b/inst/sql/sql_server/export_v4/visit/sqlVisitTreemap.sql deleted file mode 100644 index 17c857f2..00000000 --- a/inst/sql/sql_server/export_v4/visit/sqlVisitTreemap.sql +++ /dev/null @@ -1,11 +0,0 @@ -select c1.concept_id, - c1.concept_name as concept_path, - ar1.count_value as num_persons, - 1.0*ar1.count_value / denom.count_value as percent_persons, - 1.0*ar2.count_value / ar1.count_value as records_per_person -from (select * from @results_database_schema.ACHILLES_results where analysis_id = 200) ar1 - inner join - (select * from @results_database_schema.ACHILLES_results where analysis_id = 201) ar2 on ar1.stratum_1 = ar2.stratum_1 - inner join @vocab_database_schema.concept c1 on CAST(ar1.stratum_1 AS INT) = c1.concept_id, - (select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1) denom -order by ar1.count_value desc diff --git a/inst/sql/sql_server/export_v5/datadensity/conceptsperperson.sql b/inst/sql/sql_server/export_v5/datadensity/conceptsperperson.sql deleted file mode 100644 index b39918d8..00000000 --- a/inst/sql/sql_server/export_v5/datadensity/conceptsperperson.sql +++ /dev/null @@ -1,76 +0,0 @@ -select 'Condition occurrence' as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 403 - -union - -select 'Procedure occurrence' as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 603 - -union - -select 'Drug exposure' as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 703 - -union - -select 'Observation' as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 803 - -union - -select 'Drug era' as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 903 - -union - -select 'Condition era' as Category, - ard1.min_value as min_value, - ard1.p10_value as p10_value, - ard1.p25_value as p25_value, - ard1.median_value as median_value, - ard1.p75_value as p75_value, - ard1.p90_value as p90_value, - ard1.max_value as max_value -from @results_database_schema.ACHILLES_results_dist ard1 -where ard1.analysis_id = 1003 - diff --git a/inst/sql/sql_server/export_v5/observationperiod/observationlength_stats.sql b/inst/sql/sql_server/export_v5/observationperiod/observationlength_stats.sql deleted file mode 100644 index 52f311c9..00000000 --- a/inst/sql/sql_server/export_v5/observationperiod/observationlength_stats.sql +++ /dev/null @@ -1,9 +0,0 @@ -select min(cast(ar1.stratum_1 as int)) * 30 as min_value, - max(cast(ar1.stratum_1 as int)) * 30 as max_value, - 30 as interval_size -from @results_database_schema.ACHILLES_analysis aa1 -inner join @results_database_schema.ACHILLES_results ar1 on aa1.analysis_id = ar1.analysis_id, -( - select count_value from @results_database_schema.ACHILLES_results where analysis_id = 1 -) denom -where aa1.analysis_id = 108 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v5/observationperiod/observedbyyear_stats.sql b/inst/sql/sql_server/export_v5/observationperiod/observedbyyear_stats.sql deleted file mode 100644 index 048971af..00000000 --- a/inst/sql/sql_server/export_v5/observationperiod/observedbyyear_stats.sql +++ /dev/null @@ -1,5 +0,0 @@ -select min(cast(ar1.stratum_1 as int)) as min_value, - max(cast(ar1.stratum_1 as int)) as max_value, - 1 as interval_size -from @results_database_schema.ACHILLES_results ar1 -where ar1.analysis_id = 109 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v5/observationperiod/periodsperperson.sql b/inst/sql/sql_server/export_v5/observationperiod/periodsperperson.sql deleted file mode 100644 index 67d04894..00000000 --- a/inst/sql/sql_server/export_v5/observationperiod/periodsperperson.sql +++ /dev/null @@ -1,5 +0,0 @@ -select row_number() over (order by ar1.stratum_1) as concept_id, - ar1.stratum_1 as concept_name, - ar1.count_value as count_value -from @results_database_schema.ACHILLES_results ar1 -where ar1.analysis_id = 113 \ No newline at end of file diff --git a/inst/sql/sql_server/export_v5/person/population.sql b/inst/sql/sql_server/export_v5/person/population.sql deleted file mode 100644 index 62824ad4..00000000 --- a/inst/sql/sql_server/export_v5/person/population.sql +++ /dev/null @@ -1,19 +0,0 @@ -(select aa1.analysis_name as attribute_name, - ar1.stratum_1 as attribute_value -from @results_database_schema.ACHILLES_analysis aa1 -inner join -@results_database_schema.ACHILLES_results ar1 -on aa1.analysis_id = ar1.analysis_id -where aa1.analysis_id = 0 - -union - -select aa1.analysis_name as attribute_name, -cast(ar1.count_value as varchar) as attribute_value -from @results_database_schema.ACHILLES_analysis aa1 -inner join -@results_database_schema.ACHILLES_results ar1 -on aa1.analysis_id = ar1.analysis_id -where aa1.analysis_id = 1 -) -order by attribute_name desc diff --git a/inst/sql/sql_server/export_v5/person/yearofbirth_stats.sql b/inst/sql/sql_server/export_v5/person/yearofbirth_stats.sql deleted file mode 100644 index 63da3ee2..00000000 --- a/inst/sql/sql_server/export_v5/person/yearofbirth_stats.sql +++ /dev/null @@ -1,5 +0,0 @@ -select min(cast(ar1.stratum_1 as int)) as min_value, - max(cast(ar1.stratum_1 as int)) as max_value, - 1 as interval_size -from @results_database_schema.ACHILLES_results ar1 -where ar1.analysis_id = 3 diff --git a/inst/sql/sql_server/heels/merge_derived.sql b/inst/sql/sql_server/heels/merge_derived.sql new file mode 100755 index 00000000..d3266252 --- /dev/null +++ b/inst/sql/sql_server/heels/merge_derived.sql @@ -0,0 +1,15 @@ +IF OBJECT_ID('@schema@schemaDelim@destination', 'U') IS NOT NULL + DROP TABLE @schema@schemaDelim@destination; + +select + analysis_id, + stratum_1, + stratum_2, + statistic_value, + measure_id +into @schema@schemaDelim@destination +from +( + @derivedSqls +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/merge_heel_results.sql b/inst/sql/sql_server/heels/merge_heel_results.sql new file mode 100755 index 00000000..6ef74c9f --- /dev/null +++ b/inst/sql/sql_server/heels/merge_heel_results.sql @@ -0,0 +1,14 @@ +IF OBJECT_ID('@schema@schemaDelim@destination', 'U') IS NOT NULL + DROP TABLE @schema@schemaDelim@destination; + +select distinct + analysis_id, + achilles_heel_warning, + rule_id, + record_count +into @schema@schemaDelim@destination +from +( + @resultSqls +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_1.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_1.sql new file mode 100755 index 00000000..7052959a --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_1.sql @@ -0,0 +1,69 @@ +--Some rules check conformance to the CDM model, other rules look at data quality + + +--ruleid 1 check for non-zero counts from checks of improper data (invalid ids, out-of-bound data, inconsistent dates) + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; count (n=', cast(or1.count_value as VARCHAR(19)), ') should not be > 0') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 1 as rule_id, + or1.count_value as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN ( + 7, + 8, + 9, + 114, + 115, + 118, + 207, + 208, + 209, + 210, + 302, + 409, + 410, + 411, + 412, + 413, + 509, + --510, taken out from this umbrella rule and implemented separately + 609, + 610, + 612, + 613, + 709, + 710, + 711, + 712, + 713, + 809, + 810, + 812, + 813, + 814, + 908, + 909, + 910, + 1008, + 1009, + 1010, + 1415, + 1500, + 1501, + 1600, + 1601, + 1701 + ) --all explicit counts of data anamolies + AND or1.count_value > 0 + ) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_10.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_10.sql new file mode 100755 index 00000000..75541ced --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_10.sql @@ -0,0 +1,27 @@ +--ruleid 10 place of service - 14 CMS place of service, 24 OMOP visit + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 10 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (202) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('visit') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_11.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_11.sql new file mode 100755 index 00000000..f9df6a59 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_11.sql @@ -0,0 +1,27 @@ +--ruleid 11 CDM-conformance rule:specialty - 48 specialty + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary (Specialty)') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 11 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (301) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('provider specialty') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_12.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_12.sql new file mode 100755 index 00000000..9737ed42 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_12.sql @@ -0,0 +1,30 @@ +--ruleid 12 condition occurrence, era - 1 SNOMED + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 12 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN ( + 400, + 1000 + ) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('condition','condition/drug', 'condition/meas', 'condition/obs', 'condition/procedure') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_13.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_13.sql new file mode 100755 index 00000000..2387efe7 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_13.sql @@ -0,0 +1,30 @@ +--ruleid 13 drug exposure - 8 RxNorm + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 13 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN ( + 700, + 900 + ) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('drug','condition/drug', 'device/drug', 'drug/measurement', 'drug/obs', 'drug/procedure') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_14.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_14.sql new file mode 100755 index 00000000..0de7eb38 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_14.sql @@ -0,0 +1,27 @@ +--ruleid 14 procedure - 4 CPT4/5 HCPCS/3 ICD9P + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 14 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (600) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('procedure','condition/procedure', 'device/procedure', 'drug/procedure', 'obs/procedure') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_17.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_17.sql new file mode 100755 index 00000000..02a4df15 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_17.sql @@ -0,0 +1,27 @@ +--ruleid 17 revenue code - 43 revenue code + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary (revenue code)') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 17 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (1610) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('revenue code') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_18.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_18.sql new file mode 100755 index 00000000..f9dabb54 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_18.sql @@ -0,0 +1,24 @@ +--ruleid 18 ERROR: year of birth in the future + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have year of birth in the future, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 18 as rule_id, + sum(or1.count_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN (3) + AND CAST(or1.stratum_1 AS INT) > year(getdate()) + AND or1.count_value > 0 + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_19.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_19.sql new file mode 100755 index 00000000..04aed9b8 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_19.sql @@ -0,0 +1,24 @@ +--ruleid 19 WARNING: year of birth < 1800 + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have year of birth < 1800, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 19 as rule_id, + sum(or1.count_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN (3) + AND cAST(or1.stratum_1 AS INT) < 1800 + AND or1.count_value > 0 + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_2.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_2.sql new file mode 100755 index 00000000..8ad66f9f --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_2.sql @@ -0,0 +1,55 @@ +--ruleid 2 distributions where min should not be negative + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName + from + ( + SELECT ord1.analysis_id, + CAST(CONCAT('ERROR: ', cast(ord1.analysis_id as VARCHAR(10)), ' - ', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.min_value) as VARCHAR(19)), '); min value should not be negative') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 2 as rule_id, + COUNT_BIG(ord1.min_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results_dist ord1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON ord1.analysis_id = oa1.analysis_id + WHERE ord1.analysis_id IN ( + 103, + 105, + 206, + 406, + 506, + 606, + 706, + 715, + 716, + 717, + 806, + 906, + 907, + 1006, + 1007, + 1502, + 1503, + 1504, + 1505, + 1506, + 1507, + 1508, + 1509, + 1510, + 1511, + 1602, + 1603, + 1604, + 1605, + 1606, + 1607, + 1608 + ) + AND ord1.min_value < 0 + GROUP BY ord1.analysis_id, oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_20.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_20.sql new file mode 100755 index 00000000..7d67ce0d --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_20.sql @@ -0,0 +1,24 @@ +--ruleid 20 ERROR: age < 0 + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have age < 0, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 20 as rule_id, + sum(or1.count_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN (101) + AND CAST(or1.stratum_1 AS INT) < 0 + AND or1.count_value > 0 + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_21.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_21.sql new file mode 100755 index 00000000..bcb93754 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_21.sql @@ -0,0 +1,24 @@ +--ruleid 21 ERROR: age > 150 (TODO lower number seems more appropriate) + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; should not have age > 150, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 21 as rule_id, + sum(or1.count_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN (101) + AND CAST(or1.stratum_1 AS INT) > 150 + AND or1.count_value > 0 + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_22.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_22.sql new file mode 100755 index 00000000..719d77d3 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_22.sql @@ -0,0 +1,35 @@ +--ruleid 22 WARNING: monthly change > 100% + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + null as record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT ar1.analysis_id, + CAST(CONCAT('WARNING: ', cast(ar1.analysis_id as VARCHAR(10)), '-', aa1.analysis_name, '; theres a 100% change in monthly count of events') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 22 as rule_id + + FROM @resultsDatabaseSchema.ACHILLES_analysis aa1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_results ar1 + ON aa1.analysis_id = ar1.analysis_id + INNER JOIN @resultsDatabaseSchema.ACHILLES_results ar2 + ON ar1.analysis_id = ar2.analysis_id + AND ar1.analysis_id IN ( + 420, + 620, + 720, + 820, + 920, + 1020 + ) + WHERE ( + CAST(ar1.stratum_1 AS INT) + 1 = CAST(ar2.stratum_1 AS INT) + OR CAST(ar1.stratum_1 AS INT) + 89 = CAST(ar2.stratum_1 AS INT) + ) + AND 1.0 * abs(ar2.count_value - ar1.count_value) / ar1.count_value > 1 + AND ar1.count_value > 10 +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_23.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_23.sql new file mode 100755 index 00000000..873ddcec --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_23.sql @@ -0,0 +1,38 @@ +--ruleid 23 WARNING: monthly change > 100% at concept level + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT ar1.analysis_id, + CAST(CONCAT('WARNING: ', cast(ar1.analysis_id as VARCHAR(10)), '-', aa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT ar1.stratum_1) AS VARCHAR(19)), ' concepts have a 100% change in monthly count of events') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 23 as rule_id, + COUNT_BIG(DISTINCT ar1.stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_analysis aa1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_results ar1 + ON aa1.analysis_id = ar1.analysis_id + INNER JOIN @resultsDatabaseSchema.ACHILLES_results ar2 + ON ar1.analysis_id = ar2.analysis_id + AND ar1.stratum_1 = ar2.stratum_1 + AND ar1.analysis_id IN ( + 402, + 602, + 702, + 802, + 902, + 1002 + ) + WHERE ( + ROUND(CAST(ar1.stratum_2 AS DECIMAL(18,4)),0) + 1 = ROUND(CAST(ar2.stratum_2 AS DECIMAL(18,4)),0) + OR ROUND(CAST(ar1.stratum_2 AS DECIMAL(18,4)),0) + 89 = ROUND(CAST(ar2.stratum_2 AS DECIMAL(18,4)),0) + ) + AND 1.0 * abs(ar2.count_value - ar1.count_value) / ar1.count_value > 1 + AND ar1.count_value > 10 + GROUP BY ar1.analysis_id, + aa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_24.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_24.sql new file mode 100755 index 00000000..6b5fd701 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_24.sql @@ -0,0 +1,22 @@ +--ruleid 24 WARNING: days_supply > 180 + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT ord1.analysis_id, + CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.max_value) as VARCHAR(19)), '); max value should not be > 180') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 24 as rule_id, + COUNT_BIG(ord1.max_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results_dist ord1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON ord1.analysis_id = oa1.analysis_id + WHERE ord1.analysis_id IN (715) + AND ord1.max_value > 180 + GROUP BY ord1.analysis_id, oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_25.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_25.sql new file mode 100755 index 00000000..6b771934 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_25.sql @@ -0,0 +1,22 @@ +--ruleid 25 WARNING: refills > 10 + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT ord1.analysis_id, + CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.max_value) as VARCHAR(19)), '); max value should not be > 10') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 25 as rule_id, + COUNT_BIG(ord1.max_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results_dist ord1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON ord1.analysis_id = oa1.analysis_id + WHERE ord1.analysis_id IN (716) + AND ord1.max_value > 10 + GROUP BY ord1.analysis_id, oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_26.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_26.sql new file mode 100755 index 00000000..e6d2fefb --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_26.sql @@ -0,0 +1,22 @@ +--ruleid 26 DQ rule: WARNING: quantity > 600 + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT ord1.analysis_id, + CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(count(ord1.max_value) as VARCHAR(19)), '); max value should not be > 600') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 26 as rule_id, + count(ord1.max_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results_dist ord1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON ord1.analysis_id = oa1.analysis_id + WHERE ord1.analysis_id IN (717) + AND ord1.max_value > 600 + GROUP BY ord1.analysis_id, oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_3.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_3.sql new file mode 100755 index 00000000..82816b4f --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_3.sql @@ -0,0 +1,28 @@ +--ruleid 3 death distributions where max should not be positive + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT DISTINCT ord1.analysis_id, + CAST(CONCAT('WARNING: ', cast(ord1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, ' (count = ', cast(COUNT_BIG(ord1.max_value) as VARCHAR(19)), '); max value should not be positive, otherwise its a zombie with data >1mo after death ') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 3 as rule_id, + COUNT_BIG(ord1.max_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results_dist ord1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON ord1.analysis_id = oa1.analysis_id + WHERE ord1.analysis_id IN ( + 511, + 512, + 513, + 514, + 515 + ) + AND ord1.max_value > 60 + GROUP BY ord1.analysis_id, oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_4.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_4.sql new file mode 100755 index 00000000..b1ce7c82 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_4.sql @@ -0,0 +1,42 @@ +--ruleid 4 CDM-conformance rule: invalid concept_id + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 4 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + LEFT JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN ( + 2, + 4, + 5, + 200, + 301, + 400, + 500, + 505, + 600, + 700, + 800, + 900, + 1000, + 1609, + 1610 + ) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id IS NULL + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_5.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_5.sql new file mode 100755 index 00000000..98caba70 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_5.sql @@ -0,0 +1,35 @@ +--ruleid 5 CDM-conformance rule:invalid type concept_id +--this rule is only checking that the concept is valid (joins to concept table at all) +--it does not check the vocabulary_id to further restrict the scope of the valid concepts +--to only include,for example, death types + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_2) AS VARCHAR(19)), ' concepts in data are not in vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 5 as rule_id, + COUNT_BIG(DISTINCT stratum_2) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + LEFT JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_2 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN ( + 405, + 605, + 705, + 805, + 1805 + ) + AND or1.stratum_2 IS NOT NULL + AND c1.concept_id IS NULL + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_6.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_6.sql new file mode 100755 index 00000000..06153f6b --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_6.sql @@ -0,0 +1,39 @@ +--ruleid 6 CDM-conformance rule:invalid concept_id + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('WARNING: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; data with unmapped concepts') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 6 as rule_id, + null as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN ( + 2, + 4, + 5, + 200, + 301, + 400, + 500, + 505, + 600, + 700, + 800, + 900, + 1000, + 1609, + 1610 + ) + AND or1.stratum_1 = '0' + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_7.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_7.sql new file mode 100755 index 00000000..32a65194 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_7.sql @@ -0,0 +1,28 @@ +--concept from the wrong vocabulary +--ruleid 7 CDM-conformance rule:gender - 12 HL7 + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 7 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (2) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('gender') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_8.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_8.sql new file mode 100755 index 00000000..824b5d09 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_8.sql @@ -0,0 +1,27 @@ +--ruleid 8 race - 13 CDC Race + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 8 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (4) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('race') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/heel_results/rule_9.sql b/inst/sql/sql_server/heels/parallel/heel_results/rule_9.sql new file mode 100755 index 00000000..4271cc61 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/heel_results/rule_9.sql @@ -0,0 +1,27 @@ +--ruleid 9 ethnicity - 44 ethnicity + +--HINT DISTRIBUTE_ON_KEY(analysis_id) +select + analysis_id, + ACHILLES_HEEL_warning, + rule_id, + record_count + into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + SELECT or1.analysis_id, + CAST(CONCAT('ERROR: ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; ', cast(COUNT_BIG(DISTINCT stratum_1) AS VARCHAR(19)), ' concepts in data are not in correct vocabulary (CMS Ethnicity)') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 9 as rule_id, + COUNT_BIG(DISTINCT stratum_1) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + INNER JOIN @cdmDatabaseSchema.concept c1 + ON or1.stratum_1 = CAST(c1.concept_id AS VARCHAR(19)) + WHERE or1.analysis_id IN (5) + AND or1.stratum_1 IS NOT NULL + AND c1.concept_id <> 0 + AND lower(c1.domain_id) NOT IN ('ethnicity') + GROUP BY or1.analysis_id, + oa1.analysis_name +) A; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/1.sql b/inst/sql/sql_server/heels/parallel/results_derived/1.sql new file mode 100755 index 00000000..f9755ca9 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/1.sql @@ -0,0 +1,9 @@ +select + null as analysis_id, + stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('UnmappedDataByDomain:SourceValueCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 1900 +group by stratum_1; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/13.sql b/inst/sql/sql_server/heels/parallel/results_derived/13.sql new file mode 100755 index 00000000..0e159c39 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/13.sql @@ -0,0 +1,10 @@ +select + null as analysis_id, + cast(analysis_id as varchar(255)) as stratum_1, + null as stratum_2, + COUNT_BIG(*) as statistic_value, + cast('Achilles:byAnalysis:RowCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results +group by analysis_id +; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/14.sql b/inst/sql/sql_server/heels/parallel/results_derived/14.sql new file mode 100755 index 00000000..6e6126ac --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/14.sql @@ -0,0 +1,26 @@ +{DEFAULT @derivedDataSmPtCount = 11} + +select + null as analysis_id, + a.stratum_1, + a.stratum_4 as stratum_2, + cast(1.0*a.person_cnt/b.population_size as FLOAT) as statistic_value, +cast('Visit:Type:PersonWithAtLeastOne:byDecile:Percentage' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +(select + stratum_1, + stratum_4, + sum(count_value) as person_cnt + from @resultsDatabaseSchema.ACHILLES_results + where analysis_id = 204 + group by stratum_1, stratum_4) a +inner join +(select + stratum_4, + sum(count_value) as population_size + from @resultsDatabaseSchema.ACHILLES_results + where analysis_id = 204 + group by stratum_4) b +on a.stratum_4=b.stratum_4 +where a.person_cnt >= @derivedDataSmPtCount; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/15.sql b/inst/sql/sql_server/heels/parallel/results_derived/15.sql new file mode 100755 index 00000000..9e77bcaa --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/15.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Device:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 2101; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/16.sql b/inst/sql/sql_server/heels/parallel/results_derived/16.sql new file mode 100755 index 00000000..8233febf --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/16.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Measurement:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 1801; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/17.sql b/inst/sql/sql_server/heels/parallel/results_derived/17.sql new file mode 100755 index 00000000..c48f33d7 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/17.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Observation:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 801; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/18.sql b/inst/sql/sql_server/heels/parallel/results_derived/18.sql new file mode 100755 index 00000000..6f94859c --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/18.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Procedure:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 601; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/19.sql b/inst/sql/sql_server/heels/parallel/results_derived/19.sql new file mode 100755 index 00000000..fc4d280a --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/19.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Note:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 2201; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/2.sql b/inst/sql/sql_server/heels/parallel/results_derived/2.sql new file mode 100755 index 00000000..069931e0 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/2.sql @@ -0,0 +1,43 @@ +--age at first observation by decile +select analysis_id, stratum_1, stratum_2, sum(count_value) as statistic_value, measure_id +into #temp_0 +from ( + select + null as analysis_id, + cast(floor(cast(stratum_1 as int)/10) as varchar(255)) as stratum_1, + null as stratum_2, + count_value, + cast('AgeAtFirstObsByDecile:PersonCnt' as varchar(255)) as measure_id + from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 101 +) Q +group by analysis_id, stratum_1, stratum_2, measure_id +; + +--count whether all deciles from 0 to 8 are there (has later a rule: if less the threshold, issue notification) +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('AgeAtFirstObsByDecile:DecileCnt' as varchar(255)) as measure_id +into #temp_1 +from #temp_0 +where measure_id = 'AgeAtFirstObsByDecile:PersonCnt' +and cast(stratum_1 as int) <=8; + +select analysis_id, stratum_1, stratum_2, statistic_value, measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + select analysis_id, stratum_1, stratum_2, statistic_value, measure_id + from #temp_0 + union all + select analysis_id, stratum_1, stratum_2, statistic_value, measure_id + from #temp_1 +) A; + +truncate table #temp_0; +drop table #temp_0; + +truncate table #temp_1; +drop table #temp_1; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/20.sql b/inst/sql/sql_server/heels/parallel/results_derived/20.sql new file mode 100755 index 00000000..807a6aff --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/20.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Death:DeathCause:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 501; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/21.sql b/inst/sql/sql_server/heels/parallel/results_derived/21.sql new file mode 100755 index 00000000..73f1b2cc --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/21.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Death:DeathType:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 505; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/22.sql b/inst/sql/sql_server/heels/parallel/results_derived/22.sql new file mode 100755 index 00000000..b72356d3 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/22.sql @@ -0,0 +1,15 @@ +{DEFAULT @derivedDataSmPtCount = 11} + +select + null as analysis_id, + stratum_1, + null as stratum_2, + temp_cnt as statistic_value, +cast('Death:byYear:SafePatientCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from + (select stratum_1,sum(count_value) as temp_cnt + from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 504 + group by stratum_1 + ) a +where temp_cnt >= @derivedDataSmPtCount; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/23.sql b/inst/sql/sql_server/heels/parallel/results_derived/23.sql new file mode 100755 index 00000000..25fcb425 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/23.sql @@ -0,0 +1,16 @@ +{DEFAULT @derivedDataSmPtCount = 11} + + +select + null as analysis_id, + cast(decade as varchar(255)) as stratum_1, + null as stratum_2, + temp_cnt as statistic_value, +cast('Death:byDecade:SafePatientCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from + (select left(stratum_1,3) as decade,sum(count_value) as temp_cnt + from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 504 + group by left(stratum_1,3) + ) a +where temp_cnt >= @derivedDataSmPtCount; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/24.sql b/inst/sql/sql_server/heels/parallel/results_derived/24.sql new file mode 100755 index 00000000..df07132d --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/24.sql @@ -0,0 +1,21 @@ +select + null as analysis_id, + a.stratum_1, + null as stratum_2, + cast(1.0*a.born_cnt/b.died_cnt as FLOAT) as statistic_value, + cast('Death:BornDeceasedRatio' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +(select + stratum_1, + count_value as born_cnt + from @resultsDatabaseSchema.ACHILLES_results + where analysis_id = 3) a +inner join +(select + stratum_1, + count(count_value) as died_cnt + from @resultsDatabaseSchema.ACHILLES_results + where analysis_id = 504 group by stratum_1) b +on a.stratum_1 = b.stratum_1 +where b.died_cnt > 0; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/25.sql b/inst/sql/sql_server/heels/parallel/results_derived/25.sql new file mode 100755 index 00000000..60ab16a5 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/25.sql @@ -0,0 +1,29 @@ +--general derived measures +--non-CDM sources may generate derived measures directly +--for CDM and Achilles: the fastest way to compute derived measures is to use +--existing measures +--derived measures have IDs over 100 000 (not any more, instead, they use measure_id as their id) + + +--event type derived measures analysis xx05 is often analysis by xx_type +--generate counts for meas type, drug type, proc type, obs type +--optional TODO: possibly rewrite this with CASE statement to better make 705 into drug, 605 into proc ...etc +-- in measure_id column (or make that separate sql calls for each category) + + +select analysis_id, stratum_1, stratum_2, sum(count_value) as statistic_value, measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from +( + --100000+analysis_id, + select + NULL as analysis_id, + stratum_2 as stratum_1, + null as stratum_2, + count_value, + CAST(concat('ach_',CAST(analysis_id as VARCHAR), ':GlobalCnt') as varchar(255)) as measure_id + from @resultsDatabaseSchema.ACHILLES_results + where analysis_id in (1805,705,605,805,405) +) Q +group by analysis_id, stratum_1, stratum_2, measure_id +; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/26.sql b/inst/sql/sql_server/heels/parallel/results_derived/26.sql new file mode 100755 index 00000000..f54f9482 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/26.sql @@ -0,0 +1,15 @@ +--total number of rows per domain +--this derived measure is used for later measure of % of unmapped rows +--this produces a total count of rows in condition table, procedure table etc. +--used as denominator in later measures + + +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + sum(count_value) as statistic_value, + CAST(concat('ach_',CAST(analysis_id as VARCHAR),':GlobalRowCnt') as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results +where analysis_id in (401,601,701,801,1801) group by analysis_id; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/27.sql b/inst/sql/sql_server/heels/parallel/results_derived/27.sql new file mode 100755 index 00000000..b75880f9 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/27.sql @@ -0,0 +1,14 @@ +--iris measures by percentage +--for this part, derived table is trying to adopt DQI terminolgy +--and generalize analysis naming scheme (and generalize the DQ rules) + +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + CAST(100.0*count_value/statistic.total_pts AS FLOAT) as statistic_value, + CAST(CONCAT('ach_',CAST(analysis_id as VARCHAR(10)),':Percentage') AS VARCHAR(100)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results +cross join (SELECT TOP 1 count_value as total_pts from @resultsDatabaseSchema.ACHILLES_results r where analysis_id =1) as statistic +where analysis_id in (2000,2001,2002,2003); diff --git a/inst/sql/sql_server/heels/parallel/results_derived/28.sql b/inst/sql/sql_server/heels/parallel/results_derived/28.sql new file mode 100755 index 00000000..7aab297c --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/28.sql @@ -0,0 +1,13 @@ +--concept_0 global row Counts per domain +--this is numerator for percentage value of unmapped rows (per domain) +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count_value as statistic_value, + CAST(concat('UnmappedData:ach_',cast(analysis_id as VARCHAR),':GlobalRowCnt') as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results +--TODO:stratum_1 is varchar and this comparison may fail on some db engines +--indeed, mysql got error, changed to a string comparison +where analysis_id in (401,601,701,801,1801) and stratum_1 = '0'; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/29.sql b/inst/sql/sql_server/heels/parallel/results_derived/29.sql new file mode 100755 index 00000000..b205ea52 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/29.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + sum(count_value) as statistic_value, + cast('Visit:InstanceCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 201; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/30.sql b/inst/sql/sql_server/heels/parallel/results_derived/30.sql new file mode 100755 index 00000000..5298ce00 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/30.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Condition:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 401; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/31.sql b/inst/sql/sql_server/heels/parallel/results_derived/31.sql new file mode 100755 index 00000000..e8be6ac8 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/31.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Visit:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 201; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/32.sql b/inst/sql/sql_server/heels/parallel/results_derived/32.sql new file mode 100755 index 00000000..7e15487f --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/32.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Person:Ethnicity:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 5; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/33.sql b/inst/sql/sql_server/heels/parallel/results_derived/33.sql new file mode 100755 index 00000000..30db84d8 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/33.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Person:Race:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 4; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/6.sql b/inst/sql/sql_server/heels/parallel/results_derived/6.sql new file mode 100755 index 00000000..2f0ce1b7 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/6.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('Provider:SpecialtyCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 301; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/7.sql b/inst/sql/sql_server/heels/parallel/results_derived/7.sql new file mode 100755 index 00000000..0dba2ced --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/7.sql @@ -0,0 +1,9 @@ + +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('DrugExposure:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 701; diff --git a/inst/sql/sql_server/heels/parallel/results_derived/8.sql b/inst/sql/sql_server/heels/parallel/results_derived/8.sql new file mode 100755 index 00000000..e5620b09 --- /dev/null +++ b/inst/sql/sql_server/heels/parallel/results_derived/8.sql @@ -0,0 +1,8 @@ +select + null as analysis_id, + null as stratum_1, + null as stratum_2, + count(*) as statistic_value, + cast('DrugEra:ConceptCnt' as varchar(255)) as measure_id +into @scratchDatabaseSchema@schemaDelim@tempHeelPrefix_@heelName +from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 901; diff --git a/inst/sql/sql_server/heels/serial/rule_27.sql b/inst/sql/sql_server/heels/serial/rule_27.sql new file mode 100644 index 00000000..d88a7689 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_27.sql @@ -0,0 +1,168 @@ +select * +into #rule27_1 +from +( + + select * from #achilles_rd_0 + + union all + + select + null as analysis_id, + CAST('Condition' AS VARCHAR(255)) as stratum_1, + null as stratum_2, + CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, + CAST('UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id + from #achilles_rd_0 + cross join (select statistic_value as val from #achilles_rd_0 + where measure_id like 'UnmappedData:ach_401:GlobalRowCnt') as st + where measure_id = 'ach_401:GlobalRowCnt' +) Q +; + + +select * +into #rule27_2 +from +( + + select * from #rule27_1 + + union all + + select + null as analysis_id, + CAST('Procedure' AS VARCHAR(255)) as stratum_1, + null as stratum_2, + CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, + CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id + from #rule27_1 A + cross join (select statistic_value as val from #rule27_1 + where measure_id = 'UnmappedData:ach_601:GlobalRowCnt') as st + where measure_id ='ach_601:GlobalRowCnt' + +) Q +; + + +select * +into #rule27_3 +from +( + + select * from #rule27_2 + + union all + + select + null as analysis_id, + CAST('DrugExposure' AS VARCHAR(255)) as stratum_1, + null as stratum_2, + CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, + CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id + from #rule27_2 A + cross join (select statistic_value as val from #rule27_2 + where measure_id = 'UnmappedData:ach_701:GlobalRowCnt') as st + where measure_id ='ach_701:GlobalRowCnt' + +) Q +; + + +select * +into #rule27_4 +from +( + + select * from #rule27_3 + + union all + + select + null as analysis_id, + CAST('Observation' AS VARCHAR(255)) as stratum_1, + null as stratum_2, + CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, + CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id + from #rule27_3 A + cross join (select statistic_value as val from #rule27_3 + where measure_id = 'UnmappedData:ach_801:GlobalRowCnt') as st + where measure_id ='ach_801:GlobalRowCnt' + +) Q +; + + +select * +into #rule27_5 +from +( + + select * from #rule27_4 + + union all + + select + null as analysis_id, + CAST('Measurement' AS VARCHAR(255)) as stratum_1, + null as stratum_2, + CAST(100.0*st.val/statistic_value AS FLOAT) as statistic_value, + CAST( 'UnmappedData:byDomain:Percentage' AS VARCHAR(255)) as measure_id + from #rule27_4 A + cross join (select statistic_value as val from #rule27_4 + where measure_id = 'UnmappedData:ach_1801:GlobalRowCnt') as st + where measure_id ='ach_1801:GlobalRowCnt' + +) Q +; + + +select * +into #serial_rd_@rdNewId +from +( + select * from #rule27_5 +) Q; + +truncate table #rule27_1; +drop table #rule27_1; + +truncate table #rule27_2; +drop table #rule27_2; + +truncate table #rule27_3; +drop table #rule27_3; + +truncate table #rule27_4; +drop table #rule27_4; + +truncate table #rule27_5; +drop table #rule27_5; + + +--actual rule27 + +select * +into #serial_hr_@hrNewId +from +( + select * from #achilles_hr_0 + + union all + + SELECT + null as analysis_id, + CAST(CONCAT('NOTIFICATION:Unmapped data over percentage threshold in:', + cast(d.stratum_1 as varchar(100))) AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 27 as rule_id, + null as record_count + FROM #serial_rd_@rdNewId d + where d.measure_id = 'UnmappedData:byDomain:Percentage' + and d.statistic_value > 0.1 --thresholds will be decided in the ongoing DQ-Study2 +) Q +; + +--end of rule27 + +drop table #achilles_hr_0; +drop table #achilles_rd_0; diff --git a/inst/sql/sql_server/heels/serial/rule_28.sql b/inst/sql/sql_server/heels/serial/rule_28.sql new file mode 100644 index 00000000..6fe27772 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_28.sql @@ -0,0 +1,64 @@ +--rule28 DQ rule +--are all values (or more than threshold) in measurement table non numerical? +--(count of Measurment records with no numerical value is in analysis_id 1821) + + +select * into #tempResults +from +( + select + (select count_value from @resultsDatabaseSchema.achilles_results where analysis_id = 1821)*100.0/all_count as statistic_value, + CAST('Meas:NoNumValue:Percentage' AS VARCHAR(100)) as measure_id + from + ( + select sum(count_value) as all_count from @resultsDatabaseSchema.achilles_results where analysis_id = 1820 + ) t1 +) t2 +; + +select * into #serial_rd_@rdNewId +from +( + select * from #serial_rd_@rdOldId + + union all + + select + null as analysis_id, + null as stratum_1, + null as stratum_2, + statistic_value, + measure_id + from #tempResults +) Q +; + + + +SELECT * +into #serial_hr_@hrNewId +FROM +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: percentage of non-numerical measurement records exceeds general population threshold ' AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 28 as rule_id, + cast(statistic_value as int) as record_count + from #tempResults t + --WHERE t.analysis_id IN (100730,100430) --umbrella version + WHERE measure_id = 'Meas:NoNumValue:Percentage' --t.analysis_id IN (100000) + --the intended threshold is 1 percent, this value is there to get pilot data from early adopters + AND t.statistic_value >= 80 +) Q +; + + +--clean up temp tables for rule 28 +truncate table #tempResults; +drop table #tempResults; + +--end of rule 28 diff --git a/inst/sql/sql_server/heels/serial/rule_29.sql b/inst/sql/sql_server/heels/serial/rule_29.sql new file mode 100644 index 00000000..12c805de --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_29.sql @@ -0,0 +1,38 @@ +--rule29 DQ rule +--unusual diagnosis present, this rule is terminology dependend + +with tempcnt as( + select sum(count_value) as pt_cnt from @resultsDatabaseSchema.ACHILLES_results + where analysis_id = 404 --dx by decile + and stratum_1 = '195075' --meconium + --and stratum_3 = '8507' --possible limit to males only + and cast(stratum_4 as int) >= 5 --fifth decile or more +) +select pt_cnt as record_count +into #tempResults +--set threshold here, currently it is zero +from tempcnt where pt_cnt > 0; + + +--using temp table because with clause that occurs prior insert into is causing problems +--and with clause makes the code more readable + +SELECT * +into #serial_hr_@hrNewId +FROM +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('WARNING:[PLAUSIBILITY] infant-age diagnosis (195075) at age 50+' AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 29 as rule_id, + null as record_count + from #tempResults +) Q; + +truncate table #tempResults; +drop table #tempResults; +--end of rule29 diff --git a/inst/sql/sql_server/heels/serial/rule_31.sql b/inst/sql/sql_server/heels/serial/rule_31.sql new file mode 100644 index 00000000..0eb7f7b9 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_31.sql @@ -0,0 +1,44 @@ +--rule31 DQ rule +--ratio of providers to total patients + +--compute a derived ratio +--TODO if provider count is zero it will generate division by zero (not sure how dirrerent db engins will react) +select * into #serial_rd_@rdNewId +from +( + select * from #serial_rd_@rdOldId + + union all + + select + null as analysis_id, + null as stratum_1, + null as stratum_2, + CAST(1.0*ct.total_pts/count_value AS FLOAT) as statistic_value, + CAST('Provider:PatientProviderRatio' AS VARCHAR(255)) as measure_id + from @resultsDatabaseSchema.achilles_results + cross join (select count_value as total_pts from @resultsDatabaseSchema.achilles_results r where analysis_id =1) ct + where analysis_id = 300 +) Q +; + +--actual rule + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION:[PLAUSIBILITY] database has too few providers defined (given the total patient number)' AS VARCHAR(255)) as achilles_heel_warning, + 31 as rule_id, + null as record_count + from #serial_rd_@rdNewId d + where d.measure_id = 'Provider:PatientProviderRatio' + and d.statistic_value > 10000 --thresholds will be decided in the ongoing DQ-Study2 +) Q +; diff --git a/inst/sql/sql_server/heels/serial/rule_32.sql b/inst/sql/sql_server/heels/serial/rule_32.sql new file mode 100644 index 00000000..57e79316 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_32.sql @@ -0,0 +1,22 @@ +--rule32 DQ rule +--uses iris: patients with at least one visit visit +--does 100-THE IRIS MEASURE to check for percentage of patients with no visits + +select * +into #serial_hr_@hrNewId +FROM +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: Percentage of patients with no visits exceeds threshold' AS VARCHAR(255)) as achilles_heel_warning, + 32 as rule_id, + null as record_count + from #serial_rd_@rdOldId d + where d.measure_id = 'ach_2003:Percentage' + and 100 - d.statistic_value > 27 --threshold identified in the DataQuality study +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_33.sql b/inst/sql/sql_server/heels/serial/rule_33.sql new file mode 100644 index 00000000..0379c003 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_33.sql @@ -0,0 +1,22 @@ +--rule33 DQ rule (for general population only) +--NOTIFICATION: database does not have all age 0-80 represented + + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: [GeneralPopulationOnly] Not all deciles represented at first observation' AS VARCHAR(255)) as achilles_heel_warning, + 33 as rule_id, + null as record_count + FROM #serial_rd_@rdOldId d + where d.measure_id = 'AgeAtFirstObsByDecile:DecileCnt' + and d.statistic_value < 9 --we expect deciles 0,1,2,3,4,5,6,7,8 +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_34.sql b/inst/sql/sql_server/heels/serial/rule_34.sql new file mode 100644 index 00000000..1578fa53 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_34.sql @@ -0,0 +1,24 @@ +--rule34 DQ rule +--NOTIFICATION: number of unmapped source values exceeds threshold +--related to rule 27 that looks at percentage of unmapped rows (rows as focus) +--this rule is looking at source values (as focus) + + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST(CONCAT('NOTIFICATION: Count of unmapped source values exceeds threshold in: ', cast(stratum_1 as varchar(100))) AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 34 as rule_id, + cast(statistic_value as int) as record_count + FROM #serial_rd_@rdOldId d + where measure_id = 'UnmappedDataByDomain:SourceValueCnt' + and statistic_value > 1000 --threshold will be decided in DQ study 2 +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_35.sql b/inst/sql/sql_server/heels/serial/rule_35.sql new file mode 100644 index 00000000..edb9d69e --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_35.sql @@ -0,0 +1,35 @@ +--rule35 DQ rule, NOTIFICATION +--this rule analyzes Units recorded for measurement +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + achilles_heel_warning, + rule_id, + record_count + from + ( + SELECT + CAST('NOTIFICATION: Count of measurement_ids with more than 5 distinct units exceeds threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 35 as rule_id, + cast(meas_concept_id_cnt as int) as record_count + from ( + select meas_concept_id_cnt from (select sum(freq) as meas_concept_id_cnt from + (select u_cnt, count(*) as freq from + (select stratum_1, count(*) as u_cnt + from @resultsDatabaseSchema.ACHILLES_results where analysis_id = 1807 group by stratum_1) a + group by u_cnt + ) b + where u_cnt >= 5 --threshold one for the rule + ) c + where meas_concept_id_cnt >= 10 --threshold two for the rule + ) d + ) Q +) A +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_36.sql b/inst/sql/sql_server/heels/serial/rule_36.sql new file mode 100644 index 00000000..944b497e --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_36.sql @@ -0,0 +1,23 @@ +--ruleid 36 WARNING: age > 125 (related to an error grade rule 21 that has higher threshold) +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + SELECT or1.analysis_id, + CAST(CONCAT('WARNING: ', cast(or1.analysis_id as VARCHAR), '-', oa1.analysis_name, '; should not have age > @ThresholdAgeWarning, (n=', cast(sum(or1.count_value) as VARCHAR(19)), ')') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 36 as rule_id, + sum(or1.count_value) as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN (101) + AND CAST(or1.stratum_1 AS INT) > @ThresholdAgeWarning + AND or1.count_value > 0 + GROUP BY or1.analysis_id, + oa1.analysis_name +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_37.sql b/inst/sql/sql_server/heels/serial/rule_37.sql new file mode 100644 index 00000000..847b1235 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_37.sql @@ -0,0 +1,49 @@ +--ruleid 37 DQ rule + +--derived measure for this rule - ratio of notes over the number of visits + +select * into #serial_rd_@rdNewId +from +( + select * from #serial_rd_@rdOldId + + union all + + SELECT + null as analysis_id, + null as stratum_1, + null as stratum_2, + CAST(1.0*c1.all_notes/1.0*c2.all_visits AS FLOAT) as statistic_value, + CAST( 'Note:NoteVisitRatio' AS VARCHAR(255)) as measure_id + FROM (SELECT sum(count_value) as all_notes FROM @resultsDatabaseSchema.achilles_results r WHERE analysis_id =2201 ) c1 + CROSS JOIN (SELECT sum(count_value) as all_visits FROM @resultsDatabaseSchema.achilles_results r WHERE analysis_id =201 ) c2 +) A +; + +--one co-author of the DataQuality study suggested measuring data density on visit level (in addition to +-- patient and dataset level) +--Assumption is that at least one data event (e.g., diagnisis, note) is generated for each visit +--this rule is testing that at least some notes exist (considering the number of visits) +--for datasets with zero notes the derived measure is null and rule does not fire at all +--possible elaboration of this rule include number of inpatient notes given number of inpatient visits +--current rule is on overall data density (for notes only) per visit level + + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: Notes data density is below threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 37 as rule_id, + cast(statistic_value as int) as record_count + FROM #serial_rd_@rdNewId d + where measure_id = 'Note:NoteVisitRatio' + and statistic_value < 0.01 --threshold will be decided in DataQuality study +) Q +; diff --git a/inst/sql/sql_server/heels/serial/rule_38.sql b/inst/sql/sql_server/heels/serial/rule_38.sql new file mode 100644 index 00000000..8ad96f65 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_38.sql @@ -0,0 +1,23 @@ +--ruleid 38 DQ rule; in a general dataset, it is expected that more than providers with a wide range of specialties +--(at least more than just one specialty) is present +--notification may indicate that provider table is missing data on specialty +--typical dataset has at least 28 specialties present in provider table + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: [GeneralPopulationOnly] Count of distinct specialties of providers in the PROVIDER table is below threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 38 as rule_id, + cast(statistic_value as int) as record_count + FROM #serial_rd_@rdOldId d + where measure_id = 'Provider:SpeciatlyCnt' + and statistic_value < 2 --DataQuality data indicate median of 55 specialties (percentile25 is 28; percentile10 is 2) +) Q +; diff --git a/inst/sql/sql_server/heels/serial/rule_39.sql b/inst/sql/sql_server/heels/serial/rule_39.sql new file mode 100644 index 00000000..a8f683a4 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_39.sql @@ -0,0 +1,27 @@ +--ruleid 39 DQ rule; Given lifetime record DQ assumption if more than 30k patients is born for every deceased patient +--the dataset may not be recording complete records for all senior patients in that year +--derived ratio measure Death:BornDeceasedRatio only exists for years where death data exist +--to avoid alerting on too early years such as 1925 where births exist but no deaths + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: [GeneralPopulationOnly] In some years, number of deaths is too low considering the number of births (lifetime record DQ assumption)' AS VARCHAR(255)) as achilles_heel_warning, + 39 as rule_id, + year_cnt as record_count + from + ( + select count(*) as year_cnt + from #serial_rd_@rdOldId + where measure_id = 'Death:BornDeceasedRatio' and statistic_value > 30000 + ) a + where a.year_cnt > 0 +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_40.sql b/inst/sql/sql_server/heels/serial/rule_40.sql new file mode 100644 index 00000000..467aea99 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_40.sql @@ -0,0 +1,21 @@ +--ruleid 40 this rule was under umbrella rule 1 and was made into a separate rule + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + SELECT DISTINCT or1.analysis_id, + CAST(CONCAT('ERROR: Death event outside observation period, ', cast(or1.analysis_id as VARCHAR(10)), '-', oa1.analysis_name, '; count (n=', cast(or1.count_value as VARCHAR), ') should not be > 0') AS VARCHAR(255)) AS ACHILLES_HEEL_warning, + 40 as rule_id, + or1.count_value as record_count + FROM @resultsDatabaseSchema.ACHILLES_results or1 + INNER JOIN @resultsDatabaseSchema.ACHILLES_analysis oa1 + ON or1.analysis_id = oa1.analysis_id + WHERE or1.analysis_id IN (510) + AND or1.count_value > 0 +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_41.sql b/inst/sql/sql_server/heels/serial/rule_41.sql new file mode 100644 index 00000000..bb6fb3d0 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_41.sql @@ -0,0 +1,26 @@ +--ruleid 41 DQ rule, data density +--porting a Sentinel rule that checks for certain vital signs data (weight, in this case) +--multiple concepts_ids may be added to broaden the rule, however standardizing on a single +--concept would be more optimal +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION:No body weight data in MEASUREMENT table (under concept_id 3025315 (LOINC code 29463-7))' AS VARCHAR(255)) as achilles_heel_warning, + 41 as rule_id, + null as record_count + from + ( + select count(*) as row_present + from @resultsDatabaseSchema.ACHILLES_results + where analysis_id = 1800 and stratum_1 = '3025315' + ) a + where a.row_present = 0 +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_42.sql b/inst/sql/sql_server/heels/serial/rule_42.sql new file mode 100644 index 00000000..5869d503 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_42.sql @@ -0,0 +1,29 @@ +--ruleid 42 DQ rule +--Percentage of outpatient visits (concept_id 9202) is too low (for general population). +--This may indicate a dataset with mostly inpatient data (that may be biased and missing some EHR events) +--Threshold was decided as 10th percentile in empiric comparison of 12 real world datasets in the DQ-Study2 + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: [GeneralPopulationOnly] Percentage of outpatient visits is below threshold' AS VARCHAR(255)) as achilles_heel_warning, + 42 as rule_id, + null as record_count + from + ( + select + 1.0*achilles_results.count_value/c1.count_value as outp_perc + from @resultsDatabaseSchema.achilles_results + cross join (select sum(count_value) as count_value from @resultsDatabaseSchema.achilles_results where analysis_id = 201) c1 + where analysis_id = 201 and stratum_1='9202' + ) d + where d.outp_perc < @ThresholdOutpatientVisitPerc +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_43.sql b/inst/sql/sql_server/heels/serial/rule_43.sql new file mode 100644 index 00000000..9892bed3 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_43.sql @@ -0,0 +1,29 @@ +--ruleid 43 DQ rule +--looks at observation period data, if all patients have exactly one the rule alerts the user +--This rule is based on majority of real life datasets. +--For some datasets (e.g., UK national data with single payor, one observation period is perfectly valid) + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + select + null as analysis_id, + CAST('NOTIFICATION: 99+ percent of persons have exactly one observation period' AS VARCHAR(255)) as achilles_heel_warning, + 43 as rule_id, + null as record_count + from + ( + select + 100.0*achilles_results.count_value/ct.total_pts as one_obs_per_perc + from @resultsDatabaseSchema.achilles_results + cross join (select count_value as total_pts from @resultsDatabaseSchema.achilles_results r where analysis_id =1) as ct + where analysis_id = 113 and stratum_1 = '1' + ) d + where d.one_obs_per_perc >= 99.0 +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/heels/serial/rule_44.sql b/inst/sql/sql_server/heels/serial/rule_44.sql new file mode 100644 index 00000000..f7fe6611 --- /dev/null +++ b/inst/sql/sql_server/heels/serial/rule_44.sql @@ -0,0 +1,21 @@ +--ruleid 44 DQ rule +--uses iris measure: patients with at least 1 Meas, 1 Dx and 1 Rx + +select * +into #serial_hr_@hrNewId +from +( + select * from #serial_hr_@hrOldId + + union all + + SELECT + null as analysis_id, + CAST('NOTIFICATION: Percentage of patients with at least 1 Measurement, 1 Dx and 1 Rx is below threshold' AS VARCHAR(255)) as ACHILLES_HEEL_warning, + 44 as rule_id, + null as record_count + FROM #serial_rd_@rdOldId d + where d.measure_id = 'ach_2002:Percentage' + and d.statistic_value < @ThresholdMinimalPtMeasDxRx --threshold identified in the DataQuality study +) Q +; \ No newline at end of file diff --git a/inst/sql/sql_server/Achilles_indices_v5.sql b/inst/sql/sql_server/post_processing/achilles_indices.sql old mode 100644 new mode 100755 similarity index 66% rename from inst/sql/sql_server/Achilles_indices_v5.sql rename to inst/sql/sql_server/post_processing/achilles_indices.sql index 766d515b..e25264b0 --- a/inst/sql/sql_server/Achilles_indices_v5.sql +++ b/inst/sql/sql_server/post_processing/achilles_indices.sql @@ -1,8 +1,8 @@ /****************************************************************** -# @file ACHILLES_indices_v5.SQL +# @file ACHILLES_indices.SQL # -# Copyright 2017 Observational Health Data Sciences and Informatics +# Copyright 2018 Observational Health Data Sciences and Informatics # # This file is part of ACHILLES # @@ -36,32 +36,32 @@ Achilles - indices for tables {@is_pdw}?{ CREATE CLUSTERED COLUMNSTORE INDEX ClusteredIndex_Achilles_results - ON @results_database_schema.ACHILLES_results; + ON @resultsDatabaseSchema.ACHILLES_results; } CREATE INDEX idx_ar_aid - ON @results_database_schema.ACHILLES_results (analysis_id); + ON @resultsDatabaseSchema.ACHILLES_results (analysis_id); CREATE INDEX idx_ar_s1 - ON @results_database_schema.ACHILLES_results (stratum_1); + ON @resultsDatabaseSchema.ACHILLES_results (stratum_1); CREATE INDEX idx_ar_s2 - ON @results_database_schema.ACHILLES_results (stratum_2); + ON @resultsDatabaseSchema.ACHILLES_results (stratum_2); CREATE INDEX idx_ar_aid_s1 - ON @results_database_schema.ACHILLES_results (analysis_id, stratum_1); + ON @resultsDatabaseSchema.ACHILLES_results (analysis_id, stratum_1); CREATE INDEX idx_ar_aid_s1234 - ON @results_database_schema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4); + ON @resultsDatabaseSchema.ACHILLES_results (analysis_id, stratum_1, stratum_2, stratum_3, stratum_4); CREATE INDEX idx_ard_aid - ON @results_database_schema.ACHILLES_results_dist (analysis_id); + ON @resultsDatabaseSchema.ACHILLES_results_dist (analysis_id); CREATE INDEX idx_ard_s1 - ON @results_database_schema.ACHILLES_results_dist (stratum_1); + ON @resultsDatabaseSchema.ACHILLES_results_dist (stratum_1); CREATE INDEX idx_ard_s2 - ON @results_database_schema.ACHILLES_results_dist (stratum_2); + ON @resultsDatabaseSchema.ACHILLES_results_dist (stratum_2); /*********************************************/ /***** Index the hierarchy lookup table *****/ /*********************************************/ CREATE INDEX idx_ch_cid - ON @results_database_schema.concept_hierarchy (concept_id); + ON @resultsDatabaseSchema.concept_hierarchy (concept_id); CREATE INDEX idx_ch_tmap - ON @results_database_schema.concept_hierarchy (treemap); + ON @resultsDatabaseSchema.concept_hierarchy (treemap); diff --git a/inst/sql/sql_server/post_processing/concept_hierarchies/condition.sql b/inst/sql/sql_server/post_processing/concept_hierarchies/condition.sql new file mode 100755 index 00000000..46dfce8f --- /dev/null +++ b/inst/sql/sql_server/post_processing/concept_hierarchies/condition.sql @@ -0,0 +1,102 @@ +/********** CONDITION/CONDITION_ERA **********/ + +--HINT DISTRIBUTE_ON_KEY(concept_id) + SELECT + snomed.concept_id, + snomed.concept_name AS concept_name, + 'Condition' AS treemap, + null as concept_hierarchy_type, + pt_to_hlt.pt_concept_name as level1_concept_name, + hlt_to_hlgt.hlt_concept_name as level2_concept_name, + hlgt_to_soc.hlgt_concept_name as level3_concept_name, + soc.concept_name AS level4_concept_name +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_condition + FROM + ( + SELECT + concept_id, + concept_name + FROM @vocabDatabaseSchema.concept + WHERE domain_id = 'Condition' + ) snomed + LEFT JOIN + (SELECT + c1.concept_id AS snomed_concept_id, + max(c2.concept_id) AS pt_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.domain_id = 'Condition' + AND ca1.min_levels_of_separation = 1 + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'MedDRA' + GROUP BY c1.concept_id + ) snomed_to_pt + ON snomed.concept_id = snomed_to_pt.snomed_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS pt_concept_id, + c1.concept_name AS pt_concept_name, + max(c2.concept_id) AS hlt_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'MedDRA' + AND ca1.min_levels_of_separation = 1 + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'MedDRA' + GROUP BY c1.concept_id, c1.concept_name + ) pt_to_hlt + ON snomed_to_pt.pt_concept_id = pt_to_hlt.pt_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS hlt_concept_id, + c1.concept_name AS hlt_concept_name, + max(c2.concept_id) AS hlgt_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'MedDRA' + AND ca1.min_levels_of_separation = 1 + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'MedDRA' + GROUP BY c1.concept_id, c1.concept_name + ) hlt_to_hlgt + ON pt_to_hlt.hlt_concept_id = hlt_to_hlgt.hlt_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS hlgt_concept_id, + c1.concept_name AS hlgt_concept_name, + max(c2.concept_id) AS soc_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'MedDRA' + AND ca1.min_levels_of_separation = 1 + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'MedDRA' + GROUP BY c1.concept_id, c1.concept_name + ) hlgt_to_soc + ON hlt_to_hlgt.hlgt_concept_id = hlgt_to_soc.hlgt_concept_id + + LEFT JOIN @vocabDatabaseSchema.concept soc + ON hlgt_to_soc.soc_concept_id = soc.concept_id; diff --git a/inst/sql/sql_server/post_processing/concept_hierarchies/drug.sql b/inst/sql/sql_server/post_processing/concept_hierarchies/drug.sql new file mode 100755 index 00000000..141314ed --- /dev/null +++ b/inst/sql/sql_server/post_processing/concept_hierarchies/drug.sql @@ -0,0 +1,93 @@ +/********** DRUG **********/ + +--HINT DISTRIBUTE_ON_KEY(concept_id) + SELECT + rxnorm.concept_id, + rxnorm.concept_name AS concept_name, + 'Drug' AS treemap, + null as concept_hierarchy_type, + rxnorm.rxnorm_ingredient_concept_name as level1_concept_name, + atc5_to_atc3.atc5_concept_name as level2_concept_name, + atc3_to_atc1.atc3_concept_name as level3_concept_name, + atc1.concept_name AS level4_concept_name +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_drug + FROM + ( + SELECT + c1.concept_id, + c1.concept_name, + c2.concept_id AS rxnorm_ingredient_concept_id, + c2.concept_name AS RxNorm_ingredient_concept_name + FROM @vocabDatabaseSchema.concept c1 + INNER JOIN @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.domain_id = 'Drug' + INNER JOIN @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.domain_id = 'Drug' + AND c2.concept_class_id = 'Ingredient' + ) rxnorm + LEFT JOIN + (SELECT + c1.concept_id AS rxnorm_ingredient_concept_id, + max(c2.concept_id) AS atc5_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.domain_id = 'Drug' + AND c1.concept_class_id = 'Ingredient' + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'ATC' + AND c2.concept_class_id = 'ATC 4th' + GROUP BY c1.concept_id + ) rxnorm_to_atc5 + ON rxnorm.rxnorm_ingredient_concept_id = rxnorm_to_atc5.rxnorm_ingredient_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS atc5_concept_id, + c1.concept_name AS atc5_concept_name, + max(c2.concept_id) AS atc3_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'ATC' + AND c1.concept_class_id = 'ATC 4th' + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'ATC' + AND c2.concept_class_id = 'ATC 2nd' + GROUP BY c1.concept_id, c1.concept_name + ) atc5_to_atc3 + ON rxnorm_to_atc5.atc5_concept_id = atc5_to_atc3.atc5_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS atc3_concept_id, + c1.concept_name AS atc3_concept_name, + max(c2.concept_id) AS atc1_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'ATC' + AND c1.concept_class_id = 'ATC 2nd' + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'ATC' + AND c2.concept_class_id = 'ATC 1st' + GROUP BY c1.concept_id, c1.concept_name + ) atc3_to_atc1 + ON atc5_to_atc3.atc3_concept_id = atc3_to_atc1.atc3_concept_id + + LEFT JOIN @vocabDatabaseSchema.concept atc1 + ON atc3_to_atc1.atc1_concept_id = atc1.concept_id; \ No newline at end of file diff --git a/inst/sql/sql_server/post_processing/concept_hierarchies/drug_era.sql b/inst/sql/sql_server/post_processing/concept_hierarchies/drug_era.sql new file mode 100755 index 00000000..ee6c5b28 --- /dev/null +++ b/inst/sql/sql_server/post_processing/concept_hierarchies/drug_era.sql @@ -0,0 +1,88 @@ +/********** DRUG_ERA **********/ + +--HINT DISTRIBUTE_ON_KEY(concept_id) + SELECT + rxnorm.rxnorm_ingredient_concept_id as concept_id, + rxnorm.rxnorm_ingredient_concept_name as concept_name, + 'Drug Era' AS treemap, + null as concept_hierarchy_type, + atc5_to_atc3.atc5_concept_name as level1_concept_name, + atc3_to_atc1.atc3_concept_name as level2_concept_name, + atc1.concept_name as level3_concept_name, + null as level4_concept_name +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_drug_era + FROM + ( + SELECT + c2.concept_id AS rxnorm_ingredient_concept_id, + c2.concept_name AS RxNorm_ingredient_concept_name + FROM + @vocabDatabaseSchema.concept c2 + WHERE + c2.domain_id = 'Drug' + AND c2.concept_class_id = 'Ingredient' + ) rxnorm + LEFT JOIN + (SELECT + c1.concept_id AS rxnorm_ingredient_concept_id, + max(c2.concept_id) AS atc5_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.domain_id = 'Drug' + AND c1.concept_class_id = 'Ingredient' + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'ATC' + AND c2.concept_class_id = 'ATC 4th' + GROUP BY c1.concept_id + ) rxnorm_to_atc5 + ON rxnorm.rxnorm_ingredient_concept_id = rxnorm_to_atc5.rxnorm_ingredient_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS atc5_concept_id, + c1.concept_name AS atc5_concept_name, + max(c2.concept_id) AS atc3_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'ATC' + AND c1.concept_class_id = 'ATC 4th' + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'ATC' + AND c2.concept_class_id = 'ATC 2nd' + GROUP BY c1.concept_id, c1.concept_name + ) atc5_to_atc3 + ON rxnorm_to_atc5.atc5_concept_id = atc5_to_atc3.atc5_concept_id + + LEFT JOIN + (SELECT + c1.concept_id AS atc3_concept_id, + c1.concept_name AS atc3_concept_name, + max(c2.concept_id) AS atc1_concept_id + FROM + @vocabDatabaseSchema.concept c1 + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca1 + ON c1.concept_id = ca1.descendant_concept_id + AND c1.vocabulary_id = 'ATC' + AND c1.concept_class_id = 'ATC 2nd' + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca1.ancestor_concept_id = c2.concept_id + AND c2.vocabulary_id = 'ATC' + AND c2.concept_class_id = 'ATC 1st' + GROUP BY c1.concept_id, c1.concept_name + ) atc3_to_atc1 + ON atc5_to_atc3.atc3_concept_id = atc3_to_atc1.atc3_concept_id + + LEFT JOIN @vocabDatabaseSchema.concept atc1 + ON atc3_to_atc1.atc1_concept_id = atc1.concept_id; diff --git a/inst/sql/sql_server/post_processing/concept_hierarchies/measurement.sql b/inst/sql/sql_server/post_processing/concept_hierarchies/measurement.sql new file mode 100755 index 00000000..5e97fc8c --- /dev/null +++ b/inst/sql/sql_server/post_processing/concept_hierarchies/measurement.sql @@ -0,0 +1,31 @@ +/********** MEASUREMENT **********/ + +--HINT DISTRIBUTE_ON_KEY(concept_id) + SELECT + m.concept_id, + m.concept_name, + 'Measurement' AS treemap, + null as concept_hierarchy_type, + max(c1.concept_name) AS level1_concept_name, + max(c2.concept_name) AS level2_concept_name, + max(c3.concept_name) AS level3_concept_name, + null as level4_concept_name +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_measurement + FROM + ( + SELECT DISTINCT + concept_id, + concept_name + FROM @vocabDatabaseSchema.concept c + WHERE domain_id = 'Measurement' + ) m + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca1 + ON M.concept_id = ca1.DESCENDANT_CONCEPT_ID AND ca1.min_levels_of_separation = 1 + LEFT JOIN @vocabDatabaseSchema.concept c1 ON ca1.ANCESTOR_CONCEPT_ID = c1.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca2 + ON c1.concept_id = ca2.DESCENDANT_CONCEPT_ID AND ca2.min_levels_of_separation = 1 + LEFT JOIN @vocabDatabaseSchema.concept c2 ON ca2.ANCESTOR_CONCEPT_ID = c2.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca3 + ON c2.concept_id = ca3.DESCENDANT_CONCEPT_ID AND ca3.min_levels_of_separation = 1 + LEFT JOIN @vocabDatabaseSchema.concept c3 ON ca3.ANCESTOR_CONCEPT_ID = c3.concept_id + GROUP BY M.concept_id, M.concept_name; \ No newline at end of file diff --git a/inst/sql/sql_server/post_processing/concept_hierarchies/observation.sql b/inst/sql/sql_server/post_processing/concept_hierarchies/observation.sql new file mode 100755 index 00000000..78646439 --- /dev/null +++ b/inst/sql/sql_server/post_processing/concept_hierarchies/observation.sql @@ -0,0 +1,31 @@ +/********** OBSERVATION **********/ + +--HINT DISTRIBUTE_ON_KEY(concept_id) + SELECT + obs.concept_id, + obs.concept_name, + 'Observation' AS treemap, + null as concept_hierarchy_type, + max(c1.concept_name) AS level1_concept_name, + max(c2.concept_name) AS level2_concept_name, + max(c3.concept_name) AS level3_concept_name, + null as level4_concept_name +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_observation + FROM + ( + SELECT + concept_id, + concept_name + FROM @vocabDatabaseSchema.concept + WHERE domain_id = 'Observation' + ) obs + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca1 + ON obs.concept_id = ca1.DESCENDANT_CONCEPT_ID AND ca1.min_levels_of_separation = 1 + LEFT JOIN @vocabDatabaseSchema.concept c1 ON ca1.ANCESTOR_CONCEPT_ID = c1.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca2 + ON c1.concept_id = ca2.DESCENDANT_CONCEPT_ID AND ca2.min_levels_of_separation = 1 + LEFT JOIN @vocabDatabaseSchema.concept c2 ON ca2.ANCESTOR_CONCEPT_ID = c2.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca3 + ON c2.concept_id = ca3.DESCENDANT_CONCEPT_ID AND ca3.min_levels_of_separation = 1 + LEFT JOIN @vocabDatabaseSchema.concept c3 ON ca3.ANCESTOR_CONCEPT_ID = c3.concept_id + GROUP BY obs.concept_id, obs.concept_name; \ No newline at end of file diff --git a/inst/sql/sql_server/post_processing/concept_hierarchies/procedure.sql b/inst/sql/sql_server/post_processing/concept_hierarchies/procedure.sql new file mode 100755 index 00000000..db2a17e5 --- /dev/null +++ b/inst/sql/sql_server/post_processing/concept_hierarchies/procedure.sql @@ -0,0 +1,114 @@ +/********** PROCEDURE **********/ + +--HINT DISTRIBUTE_ON_KEY(concept_id) + SELECT + procs.concept_id, + procs.proc_concept_name as concept_name, + 'Procedure' AS treemap, + null as concept_hierarchy_type, + max(proc_hierarchy.os3_concept_name) AS level1_concept_name, + max(proc_hierarchy.os2_concept_name) AS level2_concept_name, + max(proc_hierarchy.os1_concept_name) AS level3_concept_name, + null as level4_concept_name +into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_procedure + FROM + ( + SELECT + c1.concept_id, + v1.vocabulary_name + ' ' + c1.concept_code + ': ' + c1.concept_name AS proc_concept_name + FROM @vocabDatabaseSchema.concept c1 + INNER JOIN @vocabDatabaseSchema.vocabulary v1 + ON c1.vocabulary_id = v1.vocabulary_id + WHERE c1.domain_id = 'Procedure' + ) procs + LEFT JOIN + (SELECT + ca0.DESCENDANT_CONCEPT_ID, + max(ca0.ancestor_concept_id) AS ancestor_concept_id + FROM @vocabDatabaseSchema.concept_ancestor ca0 + INNER JOIN + (SELECT DISTINCT c2.concept_id AS os3_concept_id + FROM @vocabDatabaseSchema.concept_ancestor ca1 + INNER JOIN + @vocabDatabaseSchema.concept c1 + ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca2 + ON c1.concept_id = ca2.ANCESTOR_CONCEPT_ID + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca2.DESCENDANT_CONCEPT_ID = c2.concept_id + WHERE ca1.ancestor_concept_id = 4040390 + AND ca1.Min_LEVELS_OF_SEPARATION = 2 + AND ca2.MIN_LEVELS_OF_SEPARATION = 1 + ) t1 + ON ca0.ANCESTOR_CONCEPT_ID = t1.os3_concept_id + GROUP BY ca0.descendant_concept_id + ) ca1 + ON procs.concept_id = ca1.DESCENDANT_CONCEPT_ID + LEFT JOIN + ( + SELECT + proc_by_os1.os1_concept_name, + proc_by_os2.os2_concept_name, + proc_by_os3.os3_concept_name, + proc_by_os3.os3_concept_id + FROM + (SELECT + DESCENDANT_CONCEPT_ID AS os1_concept_id, + concept_name AS os1_concept_name + FROM @vocabDatabaseSchema.concept_ancestor ca1 + INNER JOIN + @vocabDatabaseSchema.concept c1 + ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id + WHERE ancestor_concept_id = 4040390 + AND Min_LEVELS_OF_SEPARATION = 1 + ) proc_by_os1 + + INNER JOIN + (SELECT + max(c1.CONCEPT_ID) AS os1_concept_id, + c2.concept_id AS os2_concept_id, + c2.concept_name AS os2_concept_name + FROM @vocabDatabaseSchema.concept_ancestor ca1 + INNER JOIN + @vocabDatabaseSchema.concept c1 + ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca2 + ON c1.concept_id = ca2.ANCESTOR_CONCEPT_ID + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca2.DESCENDANT_CONCEPT_ID = c2.concept_id + WHERE ca1.ancestor_concept_id = 4040390 + AND ca1.Min_LEVELS_OF_SEPARATION = 1 + AND ca2.MIN_LEVELS_OF_SEPARATION = 1 + GROUP BY c2.concept_id, c2.concept_name + ) proc_by_os2 + ON proc_by_os1.os1_concept_id = proc_by_os2.os1_concept_id + + INNER JOIN + (SELECT + max(c1.CONCEPT_ID) AS os2_concept_id, + c2.concept_id AS os3_concept_id, + c2.concept_name AS os3_concept_name + FROM @vocabDatabaseSchema.concept_ancestor ca1 + INNER JOIN + @vocabDatabaseSchema.concept c1 + ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id + INNER JOIN + @vocabDatabaseSchema.concept_ancestor ca2 + ON c1.concept_id = ca2.ANCESTOR_CONCEPT_ID + INNER JOIN + @vocabDatabaseSchema.concept c2 + ON ca2.DESCENDANT_CONCEPT_ID = c2.concept_id + WHERE ca1.ancestor_concept_id = 4040390 + AND ca1.Min_LEVELS_OF_SEPARATION = 2 + AND ca2.MIN_LEVELS_OF_SEPARATION = 1 + GROUP BY c2.concept_id, c2.concept_name + ) proc_by_os3 + ON proc_by_os2.os2_concept_id = proc_by_os3.os2_concept_id + ) proc_hierarchy + ON ca1.ancestor_concept_id = proc_hierarchy.os3_concept_id + GROUP BY procs.concept_id, + procs.proc_concept_name; diff --git a/inst/sql/sql_server/ConceptHierarchy_v5.sql b/inst/sql/sql_server/post_processing/concept_hierarchy.sql old mode 100644 new mode 100755 similarity index 55% rename from inst/sql/sql_server/ConceptHierarchy_v5.sql rename to inst/sql/sql_server/post_processing/concept_hierarchy.sql index c0760276..63abc525 --- a/inst/sql/sql_server/ConceptHierarchy_v5.sql +++ b/inst/sql/sql_server/post_processing/concept_hierarchy.sql @@ -1,44 +1,36 @@ -{DEFAULT @results_database_schema = 'webapi.dbo'} -{DEFAULT @vocab_database_schema = 'omopcdm.dbo'} +{DEFAULT @resultsDatabaseSchema = 'webapi.dbo'} +{DEFAULT @vocabDatabaseSchema = 'omopcdm.dbo'} /*********************************************************************/ /***** Create hierarchy lookup table for the treemap hierarchies *****/ /*********************************************************************/ -IF OBJECT_ID('@results_database_schema.concept_hierarchy', 'U') IS NOT NULL - DROP TABLE @results_database_schema.concept_hierarchy; +IF OBJECT_ID('@resultsDatabaseSchema.concept_hierarchy', 'U') IS NOT NULL + DROP TABLE @resultsDatabaseSchema.concept_hierarchy; -CREATE TABLE @results_database_schema.concept_hierarchy -( - concept_id INT, - concept_name VARCHAR(400), - treemap VARCHAR(20), - concept_hierarchy_type VARCHAR(20), - level1_concept_name VARCHAR(255), - level2_concept_name VARCHAR(255), - level3_concept_name VARCHAR(255), - level4_concept_name VARCHAR(255) -); /***********************************************************/ /***** Populate the hierarchy lookup table per treemap *****/ /***********************************************************/ /********** CONDITION/CONDITION_ERA **********/ -INSERT INTO @results_database_schema.concept_hierarchy -(concept_id, concept_name, treemap, level1_concept_name, level2_concept_name, level3_concept_name, level4_concept_name) + + SELECT snomed.concept_id, - CAST(snomed.concept_name AS VARCHAR(400)) AS snomed_concept_name, - CAST('Condition' AS VARCHAR(20)) AS treemap, - CAST(pt_to_hlt.pt_concept_name AS VARCHAR(255)), - CAST(hlt_to_hlgt.hlt_concept_name AS VARCHAR(255)), - CAST(hlgt_to_soc.hlgt_concept_name AS VARCHAR(255)), - CAST(soc.concept_name AS VARCHAR(255)) AS soc_concept_name + snomed.concept_name AS concept_name, + 'Condition' AS treemap, + null as concept_hierarchy_type, + pt_to_hlt.pt_concept_name as level1_concept_name, + hlt_to_hlgt.hlt_concept_name as level2_concept_name, + hlgt_to_soc.hlgt_concept_name as level3_concept_name, + soc.concept_name AS level4_concept_name +into #ch_condition + FROM ( SELECT concept_id, concept_name - FROM @vocab_database_schema.concept + FROM @vocabDatabaseSchema.concept WHERE domain_id = 'Condition' ) snomed LEFT JOIN @@ -46,14 +38,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_id AS snomed_concept_id, max(c2.concept_id) AS pt_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.domain_id = 'Condition' AND ca1.min_levels_of_separation = 1 INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'MedDRA' GROUP BY c1.concept_id @@ -66,14 +58,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS pt_concept_name, max(c2.concept_id) AS hlt_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'MedDRA' AND ca1.min_levels_of_separation = 1 INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'MedDRA' GROUP BY c1.concept_id, c1.concept_name @@ -86,14 +78,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS hlt_concept_name, max(c2.concept_id) AS hlgt_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'MedDRA' AND ca1.min_levels_of_separation = 1 INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'MedDRA' GROUP BY c1.concept_id, c1.concept_name @@ -106,34 +98,35 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS hlgt_concept_name, max(c2.concept_id) AS soc_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'MedDRA' AND ca1.min_levels_of_separation = 1 INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'MedDRA' GROUP BY c1.concept_id, c1.concept_name ) hlgt_to_soc ON hlt_to_hlgt.hlgt_concept_id = hlgt_to_soc.hlgt_concept_id - LEFT JOIN @vocab_database_schema.concept soc + LEFT JOIN @vocabDatabaseSchema.concept soc ON hlgt_to_soc.soc_concept_id = soc.concept_id; /********** DRUG **********/ -INSERT INTO @results_database_schema.concept_hierarchy -(concept_id, concept_name, treemap, level1_concept_name, level2_concept_name, level3_concept_name, level4_concept_name) + SELECT rxnorm.concept_id, - CAST(rxnorm.concept_name AS VARCHAR(400)) AS rxnorm_concept_name, - CAST('Drug' AS VARCHAR(20)) AS treemap, - CAST(rxnorm.rxnorm_ingredient_concept_name AS VARCHAR(255)), - CAST(atc5_to_atc3.atc5_concept_name AS VARCHAR(255)), - CAST(atc3_to_atc1.atc3_concept_name AS VARCHAR(255)), - CAST(atc1.concept_name AS VARCHAR(255)) AS atc1_concept_name + rxnorm.concept_name AS concept_name, + 'Drug' AS treemap, + null as concept_hierarchy_type, + rxnorm.rxnorm_ingredient_concept_name as level1_concept_name, + atc5_to_atc3.atc5_concept_name as level2_concept_name, + atc3_to_atc1.atc3_concept_name as level3_concept_name, + atc1.concept_name AS level4_concept_name +into #ch_drug FROM ( SELECT @@ -141,11 +134,11 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name, c2.concept_id AS rxnorm_ingredient_concept_id, c2.concept_name AS RxNorm_ingredient_concept_name - FROM @vocab_database_schema.concept c1 - INNER JOIN @vocab_database_schema.concept_ancestor ca1 + FROM @vocabDatabaseSchema.concept c1 + INNER JOIN @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.domain_id = 'Drug' - INNER JOIN @vocab_database_schema.concept c2 + INNER JOIN @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.domain_id = 'Drug' AND c2.concept_class_id = 'Ingredient' @@ -155,14 +148,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_id AS rxnorm_ingredient_concept_id, max(c2.concept_id) AS atc5_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.domain_id = 'Drug' AND c1.concept_class_id = 'Ingredient' INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'ATC' AND c2.concept_class_id = 'ATC 4th' @@ -176,14 +169,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS atc5_concept_name, max(c2.concept_id) AS atc3_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'ATC' AND c1.concept_class_id = 'ATC 4th' INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'ATC' AND c2.concept_class_id = 'ATC 2nd' @@ -197,14 +190,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS atc3_concept_name, max(c2.concept_id) AS atc1_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'ATC' AND c1.concept_class_id = 'ATC 2nd' INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'ATC' AND c2.concept_class_id = 'ATC 1st' @@ -212,26 +205,28 @@ INSERT INTO @results_database_schema.concept_hierarchy ) atc3_to_atc1 ON atc5_to_atc3.atc3_concept_id = atc3_to_atc1.atc3_concept_id - LEFT JOIN @vocab_database_schema.concept atc1 + LEFT JOIN @vocabDatabaseSchema.concept atc1 ON atc3_to_atc1.atc1_concept_id = atc1.concept_id; /********** DRUG_ERA **********/ -INSERT INTO @results_database_schema.concept_hierarchy -(concept_id, concept_name, treemap, level1_concept_name, level2_concept_name, level3_concept_name) + SELECT - rxnorm.rxnorm_ingredient_concept_id, - CAST(rxnorm.rxnorm_ingredient_concept_name AS VARCHAR(400)), - CAST('Drug Era' AS VARCHAR(20)) AS treemap, - CAST(atc5_to_atc3.atc5_concept_name AS VARCHAR(255)), - CAST(atc3_to_atc1.atc3_concept_name AS VARCHAR(255)), - CAST(atc1.concept_name AS VARCHAR(255)) AS atc1_concept_name + rxnorm.rxnorm_ingredient_concept_id as concept_id, + rxnorm.rxnorm_ingredient_concept_name as concept_name, + 'Drug Era' AS treemap, + null as concept_hierarchy_type, + atc5_to_atc3.atc5_concept_name as level1_concept_name, + atc3_to_atc1.atc3_concept_name as level2_concept_name, + atc1.concept_name as level3_concept_name, + null as level4_concept_name +into #ch_drug_era FROM ( SELECT c2.concept_id AS rxnorm_ingredient_concept_id, c2.concept_name AS RxNorm_ingredient_concept_name FROM - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 WHERE c2.domain_id = 'Drug' AND c2.concept_class_id = 'Ingredient' @@ -241,14 +236,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_id AS rxnorm_ingredient_concept_id, max(c2.concept_id) AS atc5_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.domain_id = 'Drug' AND c1.concept_class_id = 'Ingredient' INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'ATC' AND c2.concept_class_id = 'ATC 4th' @@ -262,14 +257,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS atc5_concept_name, max(c2.concept_id) AS atc3_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'ATC' AND c1.concept_class_id = 'ATC 4th' INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'ATC' AND c2.concept_class_id = 'ATC 2nd' @@ -283,14 +278,14 @@ INSERT INTO @results_database_schema.concept_hierarchy c1.concept_name AS atc3_concept_name, max(c2.concept_id) AS atc1_concept_id FROM - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 INNER JOIN - @vocab_database_schema.concept_ancestor ca1 + @vocabDatabaseSchema.concept_ancestor ca1 ON c1.concept_id = ca1.descendant_concept_id AND c1.vocabulary_id = 'ATC' AND c1.concept_class_id = 'ATC 2nd' INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca1.ancestor_concept_id = c2.concept_id AND c2.vocabulary_id = 'ATC' AND c2.concept_class_id = 'ATC 1st' @@ -298,84 +293,90 @@ INSERT INTO @results_database_schema.concept_hierarchy ) atc3_to_atc1 ON atc5_to_atc3.atc3_concept_id = atc3_to_atc1.atc3_concept_id - LEFT JOIN @vocab_database_schema.concept atc1 + LEFT JOIN @vocabDatabaseSchema.concept atc1 ON atc3_to_atc1.atc1_concept_id = atc1.concept_id; /********** MEASUREMENT **********/ -INSERT INTO @results_database_schema.concept_hierarchy -(concept_id, concept_name, treemap, level1_concept_name, level2_concept_name, level3_concept_name) + SELECT m.concept_id, - CAST(m.concept_name AS VARCHAR(400)), - CAST('Measurement' AS VARCHAR(20)) AS treemap, - CAST(max(c1.concept_name) AS VARCHAR(255)) AS level1_concept_name, - CAST(max(c2.concept_name) AS VARCHAR(255)) AS level2_concept_name, - CAST(max(c3.concept_name) AS VARCHAR(255)) AS level3_concept_name + m.concept_name, + 'Measurement' AS treemap, + null as concept_hierarchy_type, + max(c1.concept_name) AS level1_concept_name, + max(c2.concept_name) AS level2_concept_name, + max(c3.concept_name) AS level3_concept_name, + null as level4_concept_name +into #ch_measurement FROM ( SELECT DISTINCT concept_id, concept_name - FROM @vocab_database_schema.concept c + FROM @vocabDatabaseSchema.concept c WHERE domain_id = 'Measurement' ) m - LEFT JOIN @vocab_database_schema.concept_ancestor ca1 + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca1 ON M.concept_id = ca1.DESCENDANT_CONCEPT_ID AND ca1.min_levels_of_separation = 1 - LEFT JOIN @vocab_database_schema.concept c1 ON ca1.ANCESTOR_CONCEPT_ID = c1.concept_id - LEFT JOIN @vocab_database_schema.concept_ancestor ca2 + LEFT JOIN @vocabDatabaseSchema.concept c1 ON ca1.ANCESTOR_CONCEPT_ID = c1.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca2 ON c1.concept_id = ca2.DESCENDANT_CONCEPT_ID AND ca2.min_levels_of_separation = 1 - LEFT JOIN @vocab_database_schema.concept c2 ON ca2.ANCESTOR_CONCEPT_ID = c2.concept_id - LEFT JOIN @vocab_database_schema.concept_ancestor ca3 + LEFT JOIN @vocabDatabaseSchema.concept c2 ON ca2.ANCESTOR_CONCEPT_ID = c2.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca3 ON c2.concept_id = ca3.DESCENDANT_CONCEPT_ID AND ca3.min_levels_of_separation = 1 - LEFT JOIN @vocab_database_schema.concept c3 ON ca3.ANCESTOR_CONCEPT_ID = c3.concept_id + LEFT JOIN @vocabDatabaseSchema.concept c3 ON ca3.ANCESTOR_CONCEPT_ID = c3.concept_id GROUP BY M.concept_id, M.concept_name; /********** OBSERVATION **********/ -INSERT INTO @results_database_schema.concept_hierarchy -(concept_id, concept_name, treemap, level1_concept_name, level2_concept_name, level3_concept_name) + SELECT obs.concept_id, - CAST(obs.concept_name AS VARCHAR(400)), - CAST('Observation' AS VARCHAR(20)) AS treemap, - CAST(max(c1.concept_name) AS VARCHAR(255)) AS level1_concept_name, - CAST(max(c2.concept_name) AS VARCHAR(255)) AS level2_concept_name, - CAST(max(c3.concept_name) AS VARCHAR(255)) AS level3_concept_name + obs.concept_name, + 'Observation' AS treemap, + null as concept_hierarchy_type, + max(c1.concept_name) AS level1_concept_name, + max(c2.concept_name) AS level2_concept_name, + max(c3.concept_name) AS level3_concept_name, + null as level4_concept_name +into #ch_observation FROM ( SELECT concept_id, concept_name - FROM @vocab_database_schema.concept + FROM @vocabDatabaseSchema.concept WHERE domain_id = 'Observation' ) obs - LEFT JOIN @vocab_database_schema.concept_ancestor ca1 + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca1 ON obs.concept_id = ca1.DESCENDANT_CONCEPT_ID AND ca1.min_levels_of_separation = 1 - LEFT JOIN @vocab_database_schema.concept c1 ON ca1.ANCESTOR_CONCEPT_ID = c1.concept_id - LEFT JOIN @vocab_database_schema.concept_ancestor ca2 + LEFT JOIN @vocabDatabaseSchema.concept c1 ON ca1.ANCESTOR_CONCEPT_ID = c1.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca2 ON c1.concept_id = ca2.DESCENDANT_CONCEPT_ID AND ca2.min_levels_of_separation = 1 - LEFT JOIN @vocab_database_schema.concept c2 ON ca2.ANCESTOR_CONCEPT_ID = c2.concept_id - LEFT JOIN @vocab_database_schema.concept_ancestor ca3 + LEFT JOIN @vocabDatabaseSchema.concept c2 ON ca2.ANCESTOR_CONCEPT_ID = c2.concept_id + LEFT JOIN @vocabDatabaseSchema.concept_ancestor ca3 ON c2.concept_id = ca3.DESCENDANT_CONCEPT_ID AND ca3.min_levels_of_separation = 1 - LEFT JOIN @vocab_database_schema.concept c3 ON ca3.ANCESTOR_CONCEPT_ID = c3.concept_id + LEFT JOIN @vocabDatabaseSchema.concept c3 ON ca3.ANCESTOR_CONCEPT_ID = c3.concept_id GROUP BY obs.concept_id, obs.concept_name; /********** PROCEDURE **********/ -INSERT INTO @results_database_schema.concept_hierarchy -(concept_id, concept_name, treemap, level1_concept_name, level2_concept_name, level3_concept_name) + SELECT procs.concept_id, - CAST(procs.proc_concept_name AS VARCHAR(400)), - CAST('Procedure' AS VARCHAR(20)) AS treemap, - CAST(max(proc_hierarchy.os3_concept_name) AS VARCHAR(255)) AS level2_concept_name, - CAST(max(proc_hierarchy.os2_concept_name) AS VARCHAR(255)) AS level3_concept_name, - CAST(max(proc_hierarchy.os1_concept_name) AS VARCHAR(255)) AS level4_concept_name + procs.proc_concept_name as concept_name, + 'Procedure' AS treemap, + null as concept_hierarchy_type, + max(proc_hierarchy.os3_concept_name) AS level1_concept_name, + max(proc_hierarchy.os2_concept_name) AS level2_concept_name, + max(proc_hierarchy.os1_concept_name) AS level3_concept_name, + null as level4_concept_name +into #ch_procedure FROM ( SELECT c1.concept_id, - CONCAT(v1.vocabulary_name, ' ', c1.concept_code, ': ', c1.concept_name) AS proc_concept_name - FROM @vocab_database_schema.concept c1 - INNER JOIN @vocab_database_schema.vocabulary v1 + v1.vocabulary_name + ' ' + c1.concept_code + ': ' + c1.concept_name AS proc_concept_name + FROM @vocabDatabaseSchema.concept c1 + INNER JOIN @vocabDatabaseSchema.vocabulary v1 ON c1.vocabulary_id = v1.vocabulary_id WHERE c1.domain_id = 'Procedure' ) procs @@ -383,18 +384,18 @@ INSERT INTO @results_database_schema.concept_hierarchy (SELECT ca0.DESCENDANT_CONCEPT_ID, max(ca0.ancestor_concept_id) AS ancestor_concept_id - FROM @vocab_database_schema.concept_ancestor ca0 + FROM @vocabDatabaseSchema.concept_ancestor ca0 INNER JOIN (SELECT DISTINCT c2.concept_id AS os3_concept_id - FROM @vocab_database_schema.concept_ancestor ca1 + FROM @vocabDatabaseSchema.concept_ancestor ca1 INNER JOIN - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id INNER JOIN - @vocab_database_schema.concept_ancestor ca2 + @vocabDatabaseSchema.concept_ancestor ca2 ON c1.concept_id = ca2.ANCESTOR_CONCEPT_ID INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca2.DESCENDANT_CONCEPT_ID = c2.concept_id WHERE ca1.ancestor_concept_id = 4040390 AND ca1.Min_LEVELS_OF_SEPARATION = 2 @@ -415,9 +416,9 @@ INSERT INTO @results_database_schema.concept_hierarchy (SELECT DESCENDANT_CONCEPT_ID AS os1_concept_id, concept_name AS os1_concept_name - FROM @vocab_database_schema.concept_ancestor ca1 + FROM @vocabDatabaseSchema.concept_ancestor ca1 INNER JOIN - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id WHERE ancestor_concept_id = 4040390 AND Min_LEVELS_OF_SEPARATION = 1 @@ -428,15 +429,15 @@ INSERT INTO @results_database_schema.concept_hierarchy max(c1.CONCEPT_ID) AS os1_concept_id, c2.concept_id AS os2_concept_id, c2.concept_name AS os2_concept_name - FROM @vocab_database_schema.concept_ancestor ca1 + FROM @vocabDatabaseSchema.concept_ancestor ca1 INNER JOIN - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id INNER JOIN - @vocab_database_schema.concept_ancestor ca2 + @vocabDatabaseSchema.concept_ancestor ca2 ON c1.concept_id = ca2.ANCESTOR_CONCEPT_ID INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca2.DESCENDANT_CONCEPT_ID = c2.concept_id WHERE ca1.ancestor_concept_id = 4040390 AND ca1.Min_LEVELS_OF_SEPARATION = 1 @@ -450,15 +451,15 @@ INSERT INTO @results_database_schema.concept_hierarchy max(c1.CONCEPT_ID) AS os2_concept_id, c2.concept_id AS os3_concept_id, c2.concept_name AS os3_concept_name - FROM @vocab_database_schema.concept_ancestor ca1 + FROM @vocabDatabaseSchema.concept_ancestor ca1 INNER JOIN - @vocab_database_schema.concept c1 + @vocabDatabaseSchema.concept c1 ON ca1.DESCENDANT_CONCEPT_ID = c1.concept_id INNER JOIN - @vocab_database_schema.concept_ancestor ca2 + @vocabDatabaseSchema.concept_ancestor ca2 ON c1.concept_id = ca2.ANCESTOR_CONCEPT_ID INNER JOIN - @vocab_database_schema.concept c2 + @vocabDatabaseSchema.concept c2 ON ca2.DESCENDANT_CONCEPT_ID = c2.concept_id WHERE ca1.ancestor_concept_id = 4040390 AND ca1.Min_LEVELS_OF_SEPARATION = 2 @@ -470,3 +471,103 @@ INSERT INTO @results_database_schema.concept_hierarchy ON ca1.ancestor_concept_id = proc_hierarchy.os3_concept_id GROUP BY procs.concept_id, procs.proc_concept_name; + +select * into +@resultsDatabaseSchema.concept_hierarchy +from +( + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from #ch_condition + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from #ch_drug + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from #ch_drug_era + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from #ch_measurement + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from #ch_observation + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from #ch_procedure +) Q +; + +truncate table #ch_condition; +drop table #ch_condition; + +truncate table #ch_drug; +drop table #ch_drug; + +truncate table #ch_drug_era; +drop table #ch_drug_era; + +truncate table #ch_measurement; +drop table #ch_measurement; + +truncate table #ch_observation; +drop table #ch_observation; + +truncate table #ch_procedure; +drop table #ch_procedure; diff --git a/inst/sql/sql_server/post_processing/merge_concept_hierarchy.sql b/inst/sql/sql_server/post_processing/merge_concept_hierarchy.sql new file mode 100755 index 00000000..5b6b167c --- /dev/null +++ b/inst/sql/sql_server/post_processing/merge_concept_hierarchy.sql @@ -0,0 +1,96 @@ +/*********************************************************************/ +/***** Create hierarchy lookup table for the treemap hierarchies *****/ +/*********************************************************************/ +IF OBJECT_ID('@resultsDatabaseSchema.concept_hierarchy', 'U') IS NOT NULL + DROP TABLE @resultsDatabaseSchema.concept_hierarchy; + + +--HINT DISTRIBUTE_ON_KEY(concept_id) +select * into +@resultsDatabaseSchema.concept_hierarchy +from +( + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_condition + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_drug + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_drug_era + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_measurement + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_observation + + union all + + select + concept_id, + cast(concept_name as VARCHAR(400)) as concept_name, + cast(treemap as VARCHAR(20)) as treemap, + cast(concept_hierarchy_type as VARCHAR(20)) as concept_hierarchy_type, + cast(level1_concept_name as VARCHAR(255)) as level1_concept_name, + cast(level2_concept_name as VARCHAR(255)) as level2_concept_name, + cast(level3_concept_name as VARCHAR(255)) as level3_concept_name, + cast(level4_concept_name as VARCHAR(255)) as level4_concept_name + from @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_procedure +) Q +; + +drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_condition; +drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_drug; +drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_drug_era; +drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_measurement; +drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_observation; +drop table @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_ch_procedure; diff --git a/inst/sql/sql_server/validate_schema.sql b/inst/sql/sql_server/validate_schema.sql new file mode 100755 index 00000000..63a51357 --- /dev/null +++ b/inst/sql/sql_server/validate_schema.sql @@ -0,0 +1,555 @@ + +with cte_care_site +as +( +select cast('care_site' as varchar(50)) as tablename +from ( + SELECT top 1 + care_site_id, + location_id, + place_of_service_concept_id, + care_site_source_value, + place_of_service_source_value + FROM + @cdmDatabaseSchema.care_site +) CARE_SITE +), +cte_cdm_source +as +( + select cast('cdm_source' as varchar(50)) as tablename + from ( + select top 1 + cdm_source_name, + cdm_source_abbreviation, + cdm_holder + source_description, + source_documentation_reference, + cdm_etl_reference, + source_release_date, + cdm_release_date, + cdm_version, + vocabulary_version + from + @cdmDatabaseSchema.cdm_source +) cdm_source +), +cte_cohort +as +( + select cast('cohort' as varchar(50)) as tablename + from ( + SELECT top 1 + cohort_definition_id, + subject_id, + cohort_start_date, + cohort_end_date + FROM + @resultsDatabaseSchema.cohort +) cohort +), +cte_condition_era +as +( +select cast('condition_era' as varchar(50)) as tablename +from ( + SELECT top 1 + condition_era_id, + person_id, + condition_concept_id, + condition_era_start_date, + condition_era_end_date, + condition_occurrence_count + FROM + @cdmDatabaseSchema.condition_era +) CONDITION_ERA +), +cte_condition_occurrence +as +( +select cast('condition_occurrence' as varchar(50)) as tablename +from ( + SELECT top 1 + condition_occurrence_id, + person_id, + condition_concept_id, + condition_start_date, + condition_end_date, + condition_type_concept_id, + provider_id, + visit_occurrence_id, + condition_source_value, + condition_source_concept_id + FROM + @cdmDatabaseSchema.condition_occurrence +) condition_occurrence +), +cte_death +as +( +select cast('death' as varchar(50)) as tablename +from ( + SELECT top 1 + person_id, + death_date, + death_type_concept_id, + cause_concept_id, + cause_source_value, + cause_source_concept_id + FROM + @cdmDatabaseSchema.death +) death +), +cte_device_exposure +as +( +select cast('device_exposure' as varchar(50)) as tablename +from ( + SELECT top 1 + device_exposure_id, + person_id, + device_concept_id, + device_exposure_start_date, + device_exposure_end_date, + device_type_concept_id, + unique_device_id, + quantity, + provider_id, + visit_occurrence_id, + device_source_value, + device_source_concept_id + FROM + @cdmDatabaseSchema.device_exposure +) device_exposure +), +cte_dose_era +as +( +select cast('dose_era' as varchar(50)) as tablename +from ( + SELECT top 1 + dose_era_id, + person_id, + drug_concept_id, + unit_concept_id, + dose_value, + dose_era_start_date, + dose_era_end_date + FROM + @cdmDatabaseSchema.dose_era +) dose_era +), +cte_drug_era +as +( +select cast('drug_era' as varchar(50)) as tablename +from ( + SELECT top 1 + drug_era_id, + person_id, + drug_concept_id, + drug_era_start_date, + drug_era_end_date, + drug_exposure_count + FROM + @cdmDatabaseSchema.drug_era +) drug_era +), +cte_drug_exposure +as +( +select cast('drug_exposure' as varchar(50)) as tablename +from ( + SELECT top 1 + {@cdmVersion=='5'}?{ + drug_exposure_id, + person_id, + drug_concept_id, + drug_exposure_start_date, + drug_exposure_end_date, + drug_type_concept_id, + stop_reason, + refills, + quantity, + days_supply, + sig, + route_concept_id, + effective_drug_dose, + dose_unit_concept_id, + lot_number, + provider_id, + visit_occurrence_id, + drug_source_value, + drug_source_concept_id, + route_source_value, + dose_unit_source_value + }:{ + drug_exposure_id, + person_id, + drug_concept_id, + drug_exposure_start_date, + drug_exposure_start_datetime, + drug_exposure_end_date, + drug_exposure_end_datetime, + verbatim_end_date, + drug_type_concept_id, + stop_reason, + refills, + quantity, + days_supply, + sig, + route_concept_id, + lot_number, + provider_id, + visit_occurrence_id, + {@cdmVersion == '5.3'}?{ + visit_detail_id, + } + drug_source_value, + drug_source_concept_id, + route_source_value, + dose_unit_source_value + } + FROM + @cdmDatabaseSchema.drug_exposure +) drug_exposure +), +cte_location +as +( +select cast('location' as varchar(50)) as tablename +from ( + SELECT top 1 + location_id, + address_1, + address_2, + city, + STATE, + zip, + county, + location_source_value + FROM + @cdmDatabaseSchema.location +) location +), +{@cdmVersion == '5.3'}?{ + cte_metadata + as + ( + select cast('metadata' as varchar(50)) as tablename + from ( + select top 1 + metadata_concept_id, + metadata_type_concept_id, + name, + value_as_string, + value_as_concept_id, + metadata_date, + metadata_datetime + FROM + @cdmDatabaseSchema.metadata + ) metadata + ), +} +cte_observation +as +( +select cast('observation' as varchar(50)) as tablename +from ( + SELECT top 1 + observation_id, + person_id, + observation_concept_id, + observation_date, + {@cdmVersion == '5.3'}?{ + observation_time, + } + value_as_number, + value_as_string, + value_as_concept_id, + qualifier_concept_id, + unit_concept_id, + observation_type_concept_id, + provider_id, + visit_occurrence_id, + observation_source_value, + observation_source_concept_id, + unit_source_value, + qualifier_source_value + FROM + @cdmDatabaseSchema.observation +) observation +), +cte_observation_period +as +( +select cast('observation_period' as varchar(50)) as tablename +from ( + SELECT top 1 + observation_period_id, + person_id, + observation_period_start_date, + observation_period_end_date + FROM + @cdmDatabaseSchema.observation_period +) observation_period +), +cte_payer_plan_period +as +( +select cast('payer_plan_period' as varchar(50)) as tablename +from ( + SELECT top 1 + payer_plan_period_id, + person_id, + payer_plan_period_start_date, + payer_plan_period_end_date, + payer_source_value, + plan_source_value, + family_source_value + FROM + @cdmDatabaseSchema.payer_plan_period +) payer_plan_period +), +cte_person +as +( +select cast('person' as varchar(50)) as tablename +from ( + SELECT top 1 + person_id, + gender_concept_id, + year_of_birth, + month_of_birth, + day_of_birth, + race_concept_id, + ethnicity_concept_id, + location_id, + provider_id, + care_site_id, + person_source_value, + gender_source_value, + race_source_value, + ethnicity_source_value + FROM + @cdmDatabaseSchema.person +) person +), +cte_procedure_occurrence +as +( +select cast('procedure_occurrence' as varchar(50)) as tablename +from ( + SELECT top 1 + procedure_occurrence_id, + person_id, + procedure_concept_id, + procedure_date, + procedure_type_concept_id, + modifier_concept_id, + quantity, + provider_id, + visit_occurrence_id, + procedure_source_value, + procedure_source_concept_id, + qualifier_source_value + FROM + @cdmDatabaseSchema.procedure_occurrence +) procedure_occurrence +), +cte_provider +as +( +select cast('provider' as varchar(50)) as tablename +from ( + SELECT top 1 + provider_id, + NPI, + DEA, + specialty_concept_id, + care_site_id, + provider_source_value, + specialty_source_value + FROM + @cdmDatabaseSchema.provider +) provider +), +cte_visit_occurrence +as +( +select cast('visit_occurrence' as varchar(50)) as tablename +from ( + SELECT top 1 + visit_occurrence_id, + person_id, + visit_start_date, + visit_end_date, + visit_type_concept_id, + provider_id, + care_site_id, + visit_source_value, + visit_source_concept_id + FROM + @cdmDatabaseSchema.visit_occurrence +) visit_occurrence +), +{@runCostAnalysis}?{ + {@cdmVersion == '5'}?{ + cte_drug_cost + as + ( + select cast('drug_cost' as varchar(50)) as tablename + from ( + SELECT top 1 + drug_cost_id, + drug_exposure_id, + paid_copay, + paid_coinsurance, + paid_toward_deductible, + paid_by_payer, + paid_by_coordination_benefits, + total_out_of_pocket, + total_paid, + ingredient_cost, + dispensing_fee, + average_wholesale_price, + payer_plan_period_id + FROM + @cdmDatabaseSchema.drug_cost + ) drug_cost + ), + cte_device_cost + as + ( + select cast('device_cost' as varchar(50)) as tablename + from ( + select top 1 + device_cost_id, + device_exposure_id, + currency_concept_id, + paid_copay, + paid_coinsurance, + paid_toward_deductible, + paid_by_payer, + paid_by_coordination_benefits, + total_out_of_pocket, + total_paid, + payer_plan_period_id + FROM + @cdmDatabaseSchema.device_cost + ) drug_cost + ), + cte_procedure_cost + as + ( + select cast('procedure_cost' as varchar(50)) as tablename + from ( + SELECT top 1 + procedure_cost_id, + procedure_occurrence_id, + currency_concept_id, + paid_copay, + paid_coinsurance, + paid_toward_deductible, + paid_by_payer, + paid_by_coordination_benefits, + total_out_of_pocket, + total_paid, + revenue_code_concept_id, + payer_plan_period_id, + revenue_code_source_value + FROM + @cdmDatabaseSchema.procedure_cost + ) procedure_cost + ), + }:{ + cte_cost + as + ( + select cast('cost' as varchar(50)) as tablename + from ( + select top 1 + cost_id, + cost_event_id, + cost_domain_id, + cost_type_concept_id, + currency_concept_id, + total_charge, + total_cost, + total_paid, + paid_by_payer, + paid_by_patient, + paid_patient_copay, + paid_patient_coinsurance, + paid_patient_deductible, + paid_by_primary, + paid_ingredient_cost, + paid_dispensing_fee, + payer_plan_period_id, + amount_allowed, + revenue_code_concept_id, + revenue_code_source_value + FROM + @cdmDatabaseSchema.cost + ) cost + ), + } +} +cte_all +as +( + {@cdmVersion == '5.3'}?{ + select tablename from cte_metadata + union all + } + select tablename from cte_care_site + union all + select tablename from cte_cdm_source + union all + select tablename from cte_condition_era + union all + select tablename from cte_condition_occurrence + union all + select tablename from cte_cohort + union all + select tablename from cte_death + union all + select tablename from cte_device_exposure + union all + select tablename from cte_dose_era + union all + select tablename from cte_drug_era + union all + select tablename from cte_drug_exposure + union all + select tablename from cte_location + union all + select tablename from cte_observation + union all + select tablename from cte_observation_period + union all + select tablename from cte_payer_plan_period + union all + select tablename from cte_person + union all + select tablename from cte_procedure_occurrence + union all + select tablename from cte_provider + union all + select tablename from cte_visit_occurrence + {@runCostAnalysis}?{ + {@cdmVersion == '5'}?{ + union all + select tablename from cte_drug_cost + union all + select tablename from cte_device_cost + union all + select tablename from cte_procedure_cost + }:{ + union all + select tablename from cte_cost + } + } +) +select tablename +from cte_all; diff --git a/man/Achilles.Rd b/man/Achilles.Rd new file mode 100755 index 00000000..b11b70b8 --- /dev/null +++ b/man/Achilles.Rd @@ -0,0 +1,10 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Achilles-package.R +\docType{package} +\name{Achilles} +\alias{Achilles} +\alias{Achilles-package} +\title{Achilles} +\description{ +Achilles +} diff --git a/man/achilles.Rd b/man/achilles.Rd old mode 100644 new mode 100755 index 0a810356..a6ba0be9 --- a/man/achilles.Rd +++ b/man/achilles.Rd @@ -2,49 +2,69 @@ % Please edit documentation in R/Achilles.R \name{achilles} \alias{achilles} -\title{The main Achilles analysis} +\title{The main Achilles analyses (for v5.x)} \usage{ achilles(connectionDetails, cdmDatabaseSchema, oracleTempSchema = cdmDatabaseSchema, - resultsDatabaseSchema = cdmDatabaseSchema, sourceName = "", analysisIds, - createTable = TRUE, smallcellcount = 5, cdmVersion = "4", - runHeel = TRUE, validateSchema = FALSE, - vocabDatabaseSchema = cdmDatabaseSchema, runCostAnalysis = FALSE, - sqlOnly = FALSE, conceptHierarchy = TRUE, createIndices = TRUE) + resultsDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = resultsDatabaseSchema, + vocabDatabaseSchema = cdmDatabaseSchema, sourceName = "", analysisIds, + createTable = TRUE, smallCellCount = 5, cdmVersion = "5", + runHeel = TRUE, validateSchema = FALSE, runCostAnalysis = FALSE, + conceptHierarchy = TRUE, createIndices = TRUE, numThreads = 1, + tempAchillesPrefix = "tmpach", dropScratchTables = TRUE, + sqlOnly = FALSE, outputFolder = "output", + logMultiThreadPerformance = FALSE) } \arguments{ -\item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} -\item{cdmDatabaseSchema}{string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'.} +\item{cdmDatabaseSchema}{Fully qualified name of database schema that contains OMOP CDM schema. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_instance.dbo'.} -\item{oracleTempSchema}{For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database.} +\item{resultsDatabaseSchema}{Fully qualified name of database schema that we can write final results to. Default is cdmDatabaseSchema. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.} -\item{resultsDatabaseSchema}{string name of database schema that we can write results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} +\item{scratchDatabaseSchema}{Fully qualified name of the database schema that will store all of the intermediate scratch tables, so for example, on SQL Server, 'cdm_scratch.dbo'. +Must be accessible to/from the cdmDatabaseSchema and the resultsDatabaseSchema. Default is resultsDatabaseSchema. +Making this "#" will run Achilles in single-threaded mode and use temporary tables instead of permanent tables.} -\item{sourceName}{string name of the database, as recorded in results} +\item{vocabDatabaseSchema}{String name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} -\item{analysisIds}{(optional) a vector containing the set of Achilles analysisIds for which results will be generated. +\item{sourceName}{String name of the data source name. If blank, CDM_SOURCE table will be queried to try to obtain this.} + +\item{analysisIds}{(OPTIONAL) A vector containing the set of Achilles analysisIds for which results will be generated. If not specified, all analyses will be executed. Use \code{\link{getAnalysisDetails}} to get a list of all Achilles analyses and their Ids.} -\item{createTable}{If true, new results tables will be created in the results schema. If not, the tables are assumed to already exists, and analysis results will be added} +\item{createTable}{If true, new results tables will be created in the results schema. If not, the tables are assumed to already exist, and analysis results will be inserted (slower on MPP).} -\item{smallcellcount}{To avoid patient identifiability, cells with small counts (<= smallcellcount) are deleted.} +\item{smallCellCount}{To avoid patient identifiability, cells with small counts (<= smallCellCount) are deleted. Set to NULL if you don't want any deletions.} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} +\item{cdmVersion}{Define the OMOP CDM version used: currently supports v5 and above. Use major release number or minor number only (e.g. 5, 5.3)} \item{runHeel}{Boolean to determine if Achilles Heel data quality reporting will be produced based on the summary statistics. Default = TRUE} -\item{validateSchema}{Boolean to determine if CDM Schema Validation should be run. This could be very slow. Default = FALSE} +\item{validateSchema}{Boolean to determine if CDM Schema Validation should be run. Default = FALSE} + +\item{runCostAnalysis}{Boolean to determine if cost analysis should be run. Note: only works on v5.1+ style cost tables.} -\item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} +\item{conceptHierarchy}{Boolean to determine if the concept_hierarchy result table should be created, for use by Atlas treemaps. +Please note: this table creation only requires the Vocabulary, not the CDM itself. +You could run this once for 1 Vocab version, and then copy the table to all CDMs using that Vocab.} -\item{runCostAnalysis}{Boolean to determine if cost analysis should be run. Note: only works on CDM v5.0 style cost tables.} +\item{createIndices}{Boolean to determine if indices should be created on the resulting Achilles and concept_hierarchy table. Default= TRUE} -\item{sqlOnly}{Boolean to determine if the SQL generated by this function call should be executed or simply returned as a string. DEFAULT = FALSE indicating that the SQL code should be executed.} +\item{numThreads}{(OPTIONAL, multi-threaded mode) The number of threads to use to run Achilles in parallel. Default is 1 thread.} -\item{conceptHierarchy}{Boolean to determine if the concept_hierarchy result table should be created, for use by Atlas treemaps. Note: only works on CDM v5.0 tables.} +\item{tempAchillesPrefix}{(OPTIONAL, multi-threaded mode) The prefix to use for the scratch Achilles analyses tables. Default is "tmpach"} -\item{createIndices}{Boolean to determine if indices should be created on the resulting Achilles and concept_hierarchy table. Default= TRUE} +\item{dropScratchTables}{(OPTIONAL, multi-threaded mode) TRUE = drop the scratch tables (may take time depending on dbms), FALSE = leave them in place for later removal.} + +\item{sqlOnly}{Boolean to determine if Achilles should be fully executed. TRUE = just generate SQL files, don't actually run, FALSE = run Achilles} + +\item{outputFolder}{(OPTIONAL, SQL-only mode) Path to store SQL files} + +\item{logMultiThreadPerformance}{(OPTIONAL, multi-threaded mode) Should an RDS file of execution times for every analysis query be created in the outputFolder?} } \value{ An object of type \code{achillesResults} containing details for connecting to the database containing the results @@ -56,9 +76,15 @@ An object of type \code{achillesResults} containing details for connecting to th \code{achilles} creates descriptive statistics summary for an entire OMOP CDM instance. } \examples{ -\dontrun{ - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - achillesResults <- achilles(connectionDetails, cdmDatabaseSchema="cdm4_sim", resultsDatabaseSchema="scratch", sourceName="TestDB", validateSchema="TRUE", vocabDatabaseSchema="vocabulary") - fetchAchillesAnalysisResults(connectionDetails, "scratch", 106) -} + \dontrun{ + connectionDetails <- createConnectionDetails(dbms="sql server", server="some_server") + achillesResults <- achilles(connectionDetails = connectionDetails, + cdmDatabaseSchema = "cdm", + resultsDatabaseSchema="results", + scratchDatabaseSchema="scratch", + sourceName="Some Source", + cdmVersion = "5.3", + runCostAnalysis = TRUE, + numThreads = 10) + } } diff --git a/man/achillesHeel.Rd b/man/achillesHeel.Rd old mode 100644 new mode 100755 index bb27db0f..f4dba926 --- a/man/achillesHeel.Rd +++ b/man/achillesHeel.Rd @@ -1,26 +1,44 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Achilles.R +% Please edit documentation in R/AchillesHeel.R \name{achillesHeel} \alias{achillesHeel} -\title{execution of data quality rules} +\title{Execution of data quality rules (for v5 and above)} \usage{ achillesHeel(connectionDetails, cdmDatabaseSchema, - oracleTempSchema = cdmDatabaseSchema, - resultsDatabaseSchema = cdmDatabaseSchema, cdmVersion = "5", - vocabDatabaseSchema = cdmDatabaseSchema) + resultsDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = resultsDatabaseSchema, cdmVersion = "5", + numThreads = 1, tempHeelPrefix = "tmpheel", dropScratchTables = FALSE, + ThresholdAgeWarning = 125, ThresholdOutpatientVisitPerc = 0.43, + ThresholdMinimalPtMeasDxRx = 20.5, outputFolder = "output", + sqlOnly = FALSE) } \arguments{ -\item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} \item{cdmDatabaseSchema}{string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'.} -\item{oracleTempSchema}{For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database.} +\item{resultsDatabaseSchema}{string name of database schema that we can write final results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, +so for example 'results.dbo'.} -\item{resultsDatabaseSchema}{string name of database schema that we can write results to. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} +\item{scratchDatabaseSchema}{(OPTIONAL, multi-threaded mode) Name of a fully qualified schema that is accessible to/from the resultsDatabaseSchema, that can store all of the scratch tables. Default is resultsDatabaseSchema.} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} +\item{cdmVersion}{Define the OMOP CDM version used: currently supports v5 and above. Default = "5".} -\item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} +\item{numThreads}{(OPTIONAL, multi-threaded mode) The number of threads to use to run Achilles in parallel. Default is 1 thread.} + +\item{tempHeelPrefix}{(OPTIONAL, multi-threaded mode) The prefix to use for the "temporary" (but actually permanent) Heel tables. Default is "tmpheel"} + +\item{dropScratchTables}{(OPTIONAL, multi-threaded mode) TRUE = drop the scratch tables (may take time depending on dbms), FALSE = leave them in place} + +\item{ThresholdAgeWarning}{The maximum age to allow in Heel} + +\item{ThresholdOutpatientVisitPerc}{The maximum percentage of outpatient visits among all visits} + +\item{ThresholdMinimalPtMeasDxRx}{The minimum percentage of patients with at least 1 Measurement, 1 Dx, and 1 Rx} + +\item{outputFolder}{(OPTIONAL, sql only mode) Path to store SQL files} + +\item{sqlOnly}{Boolean to determine if Heel should be fully executed. TRUE = just generate SQL files, don't actually run, FALSE = run Achilles Heel} } \value{ nothing is returned @@ -29,11 +47,16 @@ nothing is returned \code{achillesHeel} executes data quality rules (or checks) on pre-computed analyses (or measures). } \details{ -\code{achillesHeel} contains number of rules (authored in SQL) that are executed againts achilles results tables. +\code{achillesHeel} contains number of rules (authored in SQL) that are executed against achilles results tables. } \examples{ \dontrun{ - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - achillesHeel <- achilles(connectionDetails, cdmDatabaseSchema="mycdm", resultsDatabaseSchema="scratch", vocabDatabaseSchema="vocabulary") + connectionDetails <- createConnectionDetails(dbms="sql server", server="some_server") + achillesHeel <- achillesHeel(connectionDetails = connectionDetails, + cdmDatabaseSchema = "cdm", + resultsDatabaseSchema = "results", + scratchDatabaseSchema = "scratch", + cdmVersion = "5.3.0", + numThreads = 10) } } diff --git a/man/addDatasource.Rd b/man/addDatasource.Rd old mode 100644 new mode 100755 index b3307da2..a0395d39 --- a/man/addDatasource.Rd +++ b/man/addDatasource.Rd @@ -32,10 +32,10 @@ If the datasources file exists, the data source will be added to the file. If the datasources file does not exist, a new file wil be initialized with the specified data source. } \examples{ -\dontrun{ -jsonFolderPath <- "your/output/path" -connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") -exportToJson(connectionDetails, cdmDatabaseSchema="cdm5", outputPath=jsonFolderPath) -addDatasource(jsonFolderPath, "your_data_name") -} + \dontrun{ + jsonFolderPath <- "your/output/path" + connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="yourserver") + exportToJson(connectionDetails, cdmDatabaseSchema="cdm5", outputPath=jsonFolderPath) + addDatasource(jsonFolderPath, "your_data_name") + } } diff --git a/man/allReports.Rd b/man/allReports.Rd old mode 100644 new mode 100755 diff --git a/man/conceptHierarchy.Rd b/man/conceptHierarchy.Rd deleted file mode 100644 index 0e6f6db3..00000000 --- a/man/conceptHierarchy.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Achilles.R -\name{conceptHierarchy} -\alias{conceptHierarchy} -\title{execution of concept hierarchy creation} -\usage{ -conceptHierarchy(connectionDetails, vocabDatabaseSchema, - oracleTempSchema = vocabDatabaseSchema, - resultsDatabaseSchema = vocabDatabaseSchema, cdmVersion = "5") -} -\arguments{ -\item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} - -\item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is vocabDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} - -\item{oracleTempSchema}{For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database.} - -\item{resultsDatabaseSchema}{string name of database schema that we can write results to. Default is vocabDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} - -\item{cdmVersion}{Define the OMOP CDM version used: currently support only "5". Default = "5"} -} -\value{ -nothing is returned -} -\description{ -\code{conceptHierarchy} executes script to create the concept_hierarchy table. -} -\details{ -\code{conceptHierarchy} executes script to create the concept_hierarchy table in the results schema, to be used by Atlas for treemap displays. -} -\examples{ -\dontrun{ - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - conceptHierarchy <- conceptHierarchy(connectionDetails, resultsDatabaseSchema="scratch", vocabDatabaseSchema="vocabulary") -} -} diff --git a/man/createConceptHierarchy.Rd b/man/createConceptHierarchy.Rd new file mode 100755 index 00000000..a65e11d8 --- /dev/null +++ b/man/createConceptHierarchy.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Achilles.R +\name{createConceptHierarchy} +\alias{createConceptHierarchy} +\title{Create the concept hierarchy} +\usage{ +createConceptHierarchy(connectionDetails, resultsDatabaseSchema, + scratchDatabaseSchema, vocabDatabaseSchema, numThreads = 1, + tempAchillesPrefix = "tmpach", sqlOnly = FALSE) +} +\arguments{ +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} + +\item{resultsDatabaseSchema}{Fully qualified name of database schema that we can write final results to. Default is cdmDatabaseSchema. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.} + +\item{scratchDatabaseSchema}{Fully qualified name of the database schema that will store all of the intermediate scratch tables, so for example, on SQL Server, 'cdm_scratch.dbo'. +Must be accessible to/from the cdmDatabaseSchema and the resultsDatabaseSchema. Default is resultsDatabaseSchema. +Making this "#" will run Achilles in single-threaded mode and use temporary tables instead of permanent tables.} + +\item{vocabDatabaseSchema}{String name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} + +\item{numThreads}{(OPTIONAL, multi-threaded mode) The number of threads to use to run Achilles in parallel. Default is 1 thread.} + +\item{tempAchillesPrefix}{(OPTIONAL, multi-threaded mode) The prefix to use for the scratch Achilles analyses tables. Default is "tmpach"} + +\item{sqlOnly}{TRUE = just generate SQL files, don't actually run, FALSE = run Achilles} +} +\description{ +Create the concept hierarchy +} +\details{ +Post-processing, create the concept hierarchy. +Please note: this table creation only requires the Vocabulary, not the CDM itself. +You could run this once for 1 Vocab version, and then copy the table to all CDMs using that Vocab. +} diff --git a/man/createIndices.Rd b/man/createIndices.Rd old mode 100644 new mode 100755 index 761aba8d..51ded2b5 --- a/man/createIndices.Rd +++ b/man/createIndices.Rd @@ -2,35 +2,21 @@ % Please edit documentation in R/Achilles.R \name{createIndices} \alias{createIndices} -\title{Create indices on Achilles results tables and concept hierarchy} +\title{Create indicies} \usage{ -createIndices(connectionDetails, resultsDatabaseSchema, - oracleTempSchema = resultsDatabaseSchema, sqlOnly = FALSE, - cdmVersion = "5") +createIndices(connectionDetails, resultsDatabaseSchema, sqlOnly = FALSE) } \arguments{ -\item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} -\item{resultsDatabaseSchema}{string name of database schema that holds the results tables for indexing. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} +\item{resultsDatabaseSchema}{Fully qualified name of database schema that we can write final results to. Default is cdmDatabaseSchema. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.} -\item{oracleTempSchema}{For Oracle only: the name of the database schema where you want all temporary tables to be managed. Requires create/insert permissions to this database.} - -\item{sqlOnly}{if TRUE, only the SQL code will be generated} - -\item{cdmVersion}{Define the OMOP CDM version used: currently support only "5". Default = "5"} -} -\value{ -nothing is returned +\item{sqlOnly}{TRUE = just generate SQL files, don't actually run, FALSE = run Achilles} } \description{ -\code{createIndices} executes script to create indicies on the Achilles tables. +Create indicies } \details{ -\code{createIndices} executes script to create indicies on the Achilles tables. -} -\examples{ -\dontrun{ - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - createIndices <- createIndices(connectionDetails, resultsDatabaseSchema="scratch") -} +Post-processing, create indices to help performance. Cannot be used with Redshift. } diff --git a/man/dropAllScratchTables.Rd b/man/dropAllScratchTables.Rd new file mode 100755 index 00000000..cc453888 --- /dev/null +++ b/man/dropAllScratchTables.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Achilles.R +\name{dropAllScratchTables} +\alias{dropAllScratchTables} +\title{Drop all possible scratch tables} +\usage{ +dropAllScratchTables(connectionDetails, scratchDatabaseSchema, + tempAchillesPrefix = "tmpach", tempHeelPrefix = "tmpheel", + numThreads = 1, tableTypes = c("achilles", "heel")) +} +\arguments{ +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} + +\item{scratchDatabaseSchema}{string name of database schema that Achilles scratch tables were written to.} + +\item{tempAchillesPrefix}{The prefix to use for the "temporary" (but actually permanent) Achilles analyses tables. Default is "tmpach"} + +\item{tempHeelPrefix}{The prefix to use for the "temporary" (but actually permanent) Heel tables. Default is "tmpheel"} + +\item{numThreads}{The number of threads to use to run this function. Default is 1 thread.} + +\item{tableTypes}{The types of Achilles scratch tables to drop: achilles or heel or both} +} +\description{ +Drop all possible scratch tables +} +\details{ +Drop all possible Achilles and Heel scratch tables +} diff --git a/man/exportConditionEraToJson.Rd b/man/exportConditionEraToJson.Rd old mode 100644 new mode 100755 index e2b15b0b..27a197ed --- a/man/exportConditionEraToJson.Rd +++ b/man/exportConditionEraToJson.Rd @@ -5,7 +5,7 @@ \title{exportConditionEraToJson} \usage{ exportConditionEraToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportConditionEraToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportConditionToJson.Rd b/man/exportConditionToJson.Rd old mode 100644 new mode 100755 index ac06ded2..0f6cd84e --- a/man/exportConditionToJson.Rd +++ b/man/exportConditionToJson.Rd @@ -5,7 +5,7 @@ \title{exportConditionToJson} \usage{ exportConditionToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportConditionToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportDashboardToJson.Rd b/man/exportDashboardToJson.Rd old mode 100644 new mode 100755 index 99370ac1..ddd33ec1 --- a/man/exportDashboardToJson.Rd +++ b/man/exportDashboardToJson.Rd @@ -5,7 +5,7 @@ \title{exportDashboardToJson} \usage{ exportDashboardToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportDashboardToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportDataDensityToJson.Rd b/man/exportDataDensityToJson.Rd old mode 100644 new mode 100755 index 6061f2b2..de7e23a5 --- a/man/exportDataDensityToJson.Rd +++ b/man/exportDataDensityToJson.Rd @@ -5,7 +5,7 @@ \title{exportDataDensityToJson} \usage{ exportDataDensityToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportDataDensityToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportDeathToJson.Rd b/man/exportDeathToJson.Rd old mode 100644 new mode 100755 index 7cf47f7f..3a9b5712 --- a/man/exportDeathToJson.Rd +++ b/man/exportDeathToJson.Rd @@ -5,8 +5,7 @@ \title{exportDeathToJson} \usage{ exportDeathToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportDeathToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportDrugEraToJson.Rd b/man/exportDrugEraToJson.Rd old mode 100644 new mode 100755 index 8c5ac003..2da66ded --- a/man/exportDrugEraToJson.Rd +++ b/man/exportDrugEraToJson.Rd @@ -5,8 +5,7 @@ \title{exportDrugEraToJson} \usage{ exportDrugEraToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportDrugEraToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportDrugToJson.Rd b/man/exportDrugToJson.Rd old mode 100644 new mode 100755 index 5e3a821f..dede5f57 --- a/man/exportDrugToJson.Rd +++ b/man/exportDrugToJson.Rd @@ -5,8 +5,7 @@ \title{exportDrugToJson} \usage{ exportDrugToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportDrugToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportHeelToJson.Rd b/man/exportHeelToJson.Rd old mode 100644 new mode 100755 index 9208b20b..6cec4240 --- a/man/exportHeelToJson.Rd +++ b/man/exportHeelToJson.Rd @@ -5,8 +5,7 @@ \title{exportHeelToJson} \usage{ exportHeelToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportHeelToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportMeasurementToJson.Rd b/man/exportMeasurementToJson.Rd old mode 100644 new mode 100755 index 3b78e3b7..f7dc9141 --- a/man/exportMeasurementToJson.Rd +++ b/man/exportMeasurementToJson.Rd @@ -5,7 +5,7 @@ \title{exportMeasurementToJson} \usage{ exportMeasurementToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportMeasurementToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportMetaToJson.Rd b/man/exportMetaToJson.Rd old mode 100644 new mode 100755 index d926661a..779a23cd --- a/man/exportMetaToJson.Rd +++ b/man/exportMetaToJson.Rd @@ -5,8 +5,7 @@ \title{exportMetaToJson} \usage{ exportMetaToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportMetaToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportObservationPeriodToJson.Rd b/man/exportObservationPeriodToJson.Rd old mode 100644 new mode 100755 index 459dddbe..dad2757e --- a/man/exportObservationPeriodToJson.Rd +++ b/man/exportObservationPeriodToJson.Rd @@ -5,7 +5,7 @@ \title{exportObservationPeriodToJson} \usage{ exportObservationPeriodToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportObservationPeriodToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportObservationToJson.Rd b/man/exportObservationToJson.Rd old mode 100644 new mode 100755 index d008be9c..1aefd3da --- a/man/exportObservationToJson.Rd +++ b/man/exportObservationToJson.Rd @@ -5,7 +5,7 @@ \title{exportObservationToJson} \usage{ exportObservationToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportObservationToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportPersonToJson.Rd b/man/exportPersonToJson.Rd old mode 100644 new mode 100755 index 245e9960..e9c93e68 --- a/man/exportPersonToJson.Rd +++ b/man/exportPersonToJson.Rd @@ -5,8 +5,7 @@ \title{exportPersonToJson} \usage{ exportPersonToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportPersonToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportProcedureToJson.Rd b/man/exportProcedureToJson.Rd old mode 100644 new mode 100755 index 520a2695..da467392 --- a/man/exportProcedureToJson.Rd +++ b/man/exportProcedureToJson.Rd @@ -5,7 +5,7 @@ \title{exportProcedureToJson} \usage{ exportProcedureToJson(connectionDetails, cdmDatabaseSchema, - resultsDatabaseSchema, outputPath = getwd(), cdmVersion = "4", + resultsDatabaseSchema, outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ @@ -17,8 +17,6 @@ exportProcedureToJson(connectionDetails, cdmDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/exportToJson.Rd b/man/exportToJson.Rd old mode 100644 new mode 100755 index 82b8885a..e52f42f8 --- a/man/exportToJson.Rd +++ b/man/exportToJson.Rd @@ -5,8 +5,8 @@ \title{exportToJson} \usage{ exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), reports = allReports, cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), reports = allReports, + vocabDatabaseSchema = cdmDatabaseSchema, compressIntoOneFile = FALSE) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -19,9 +19,10 @@ exportToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{reports}{A character vector listing the set of reports to generate. Default is all reports.} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} +\item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} + +\item{compressIntoOneFile}{Boolean indicating if the JSON files should be compressed into one zip file -\item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. See \code{data(allReports)} for a list of all report types} } \value{ diff --git a/man/exportVisitToJson.Rd b/man/exportVisitToJson.Rd old mode 100644 new mode 100755 index ff653bff..010923e9 --- a/man/exportVisitToJson.Rd +++ b/man/exportVisitToJson.Rd @@ -5,8 +5,7 @@ \title{exportVisitToJson} \usage{ exportVisitToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, - outputPath = getwd(), cdmVersion = "4", - vocabDatabaseSchema = cdmDatabaseSchema) + outputPath = getwd(), vocabDatabaseSchema = cdmDatabaseSchema) } \arguments{ \item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} @@ -17,8 +16,6 @@ exportVisitToJson(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{cdmVersion}{Define the OMOP CDM version used: currently support "4" and "5". Default = "4"} - \item{vocabDatabaseSchema}{string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'.} } \value{ diff --git a/man/fetchAchillesAnalysisResults.Rd b/man/fetchAchillesAnalysisResults.Rd old mode 100644 new mode 100755 index efb4242a..dc0fc34b --- a/man/fetchAchillesAnalysisResults.Rd +++ b/man/fetchAchillesAnalysisResults.Rd @@ -4,12 +4,14 @@ \alias{fetchAchillesAnalysisResults} \title{fetchAchillesAnalysisResults} \usage{ -fetchAchillesAnalysisResults(connectionDetails, resultsDatabase, analysisId) +fetchAchillesAnalysisResults(connectionDetails, resultsDatabaseSchema, + analysisId) } \arguments{ -\item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} -\item{resultsDatabase}{Name of database containing the Achilles descriptive statistics.} +\item{resultsDatabaseSchema}{Fully qualified name of database schema that we can fetch final results from. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.} \item{analysisId}{A single analysisId} } @@ -23,9 +25,9 @@ An object of type \code{achillesAnalysisResults} See \code{data(analysesDetails)} for a list of all Achilles analyses and their Ids. } \examples{ -\dontrun{ - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - achillesResults <- achilles(connectionDetails, "cdm4_sim", "scratch", "TestDB") - fetchAchillesAnalysisResults(connectionDetails, "scratch",106) -} + \dontrun{ + connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") + achillesResults <- achilles(connectionDetails, "cdm4_sim", "scratch", "TestDB") + fetchAchillesAnalysisResults(connectionDetails, "scratch",106) + } } diff --git a/man/fetchAchillesHeelResults.Rd b/man/fetchAchillesHeelResults.Rd old mode 100644 new mode 100755 index 4d7ddb9f..e5436890 --- a/man/fetchAchillesHeelResults.Rd +++ b/man/fetchAchillesHeelResults.Rd @@ -4,12 +4,13 @@ \alias{fetchAchillesHeelResults} \title{fetchAchillesHeelResults} \usage{ -fetchAchillesHeelResults(connectionDetails, resultsDatabase) +fetchAchillesHeelResults(connectionDetails, resultsDatabaseSchema) } \arguments{ -\item{connectionDetails}{An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port)} +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} -\item{resultsDatabase}{Name of database containing the Achilles descriptive statistics.} +\item{resultsDatabaseSchema}{Fully qualified name of database schema that we can fetch final results from. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.} } \value{ A table listing all identified issues @@ -22,9 +23,9 @@ AchillesHeel is a part of the Achilles analysis aimed at identifying potential d that should really be fixed) and warnings (things that should at least be investigated). } \examples{ -\dontrun{ - connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") - achillesResults <- achilles(connectionDetails, "cdm4_sim", "scratch", "TestDB") - fetchAchillesHeelResults(connectionDetails, "scratch") -} + \dontrun{ + connectionDetails <- DatabaseConnector::createConnectionDetails(dbms="sql server", server="myserver") + achillesResults <- achilles(connectionDetails, "cdm5_sim", "scratch", "TestDB") + fetchAchillesHeelResults(connectionDetails, "scratch") + } } diff --git a/man/getAnalysisDetails.Rd b/man/getAnalysisDetails.Rd old mode 100644 new mode 100755 diff --git a/man/showReportTypes.Rd b/man/showReportTypes.Rd old mode 100644 new mode 100755 diff --git a/man/validateSchema.Rd b/man/validateSchema.Rd new file mode 100755 index 00000000..9f5eb4af --- /dev/null +++ b/man/validateSchema.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Achilles.R +\name{validateSchema} +\alias{validateSchema} +\title{Validate the CDM schema} +\usage{ +validateSchema(connectionDetails, cdmDatabaseSchema, + resultsDatabaseSchema = cdmDatabaseSchema, cdmVersion, runCostAnalysis, + sqlOnly = FALSE) +} +\arguments{ +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} + +\item{cdmDatabaseSchema}{string name of database schema that contains OMOP CDM. On SQL Server, this should specifiy both the database and the schema, so for example 'cdm_instance.dbo'.} + +\item{resultsDatabaseSchema}{Fully qualified name of database schema that the cohort table is written to. Default is cdmDatabaseSchema. +On SQL Server, this should specifiy both the database and the schema, so for example, on SQL Server, 'cdm_results.dbo'.} + +\item{cdmVersion}{Define the OMOP CDM version used: currently supports v5 and above. Use major release number or minor number only (e.g. 5, 5.3)} + +\item{runCostAnalysis}{Boolean to determine if cost analysis should be run. Note: only works on CDM v5 and v5.1.0+ style cost tables.} + +\item{sqlOnly}{TRUE = just generate SQL files, don't actually run, FALSE = run Achilles} +} +\description{ +Validate the CDM schema +} +\details{ +Runs a validation script to ensure the CDM is valid based on v5.x +} diff --git a/tests/testthat.R b/tests/testthat.R old mode 100644 new mode 100755 diff --git a/tests/testthat/test-1-achilles_postgresql.R b/tests/testthat/test-1-achilles_postgresql.R deleted file mode 100644 index 656b890a..00000000 --- a/tests/testthat/test-1-achilles_postgresql.R +++ /dev/null @@ -1,30 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles main does not throw an error on Postgres", { - # Postgresql - if (Sys.getenv("CDM5_POSTGRESQL_USER") != "") { - details <- createConnectionDetails(dbms = "postgresql", - user = Sys.getenv("CDM5_POSTGRESQL_USER"), - password = URLdecode(Sys.getenv("CDM5_POSTGRESQL_PASSWORD")), - server = Sys.getenv("CDM5_POSTGRESQL_SERVER")) - try(result <- achilles(details, - cdmDatabaseSchema = Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_POSTGRESQL_OHDSI_SCHEMA"), - sourceName = "NHANES", - cdmVersion = "5", - validateSchema = FALSE, - createTable = TRUE, - conceptHierarchy = FALSE, - createIndices = FALSE)) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - expect_true(class(result) == "achillesResults") - } else { - writeLines("Skipping postgress main test") - } -}) - diff --git a/tests/testthat/test-2-achilles_mssql.R b/tests/testthat/test-2-achilles_mssql.R deleted file mode 100644 index 2c6d4f65..00000000 --- a/tests/testthat/test-2-achilles_mssql.R +++ /dev/null @@ -1,30 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles main does not throw an error on SQL Server", { - # SQL Server - if (Sys.getenv("CDM5_SQL_SERVER_USER") != "") { - details <- createConnectionDetails(dbms = "sql server", - user = Sys.getenv("CDM5_SQL_SERVER_USER"), - password = URLdecode(Sys.getenv("CDM5_SQL_SERVER_PASSWORD")), - server = Sys.getenv("CDM5_SQL_SERVER_SERVER")) - try(result <- achilles(details, - cdmDatabaseSchema = Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_SQL_SERVER_OHDSI_SCHEMA"), - sourceName = "NHANES", - cdmVersion = "5", - validateSchema = FALSE, - createTable = TRUE, - conceptHierarchy = FALSE, - createIndices = FALSE)) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - expect_true(class(result) == "achillesResults") - } else { - writeLines("Skipping sql server main test") - } -}) - diff --git a/tests/testthat/test-3-achilles_oracle.R b/tests/testthat/test-3-achilles_oracle.R deleted file mode 100644 index c27970c8..00000000 --- a/tests/testthat/test-3-achilles_oracle.R +++ /dev/null @@ -1,30 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles main does not throw an error on Oracle", { - # Oracle - if (Sys.getenv("CDM5_ORACLE_USER") != "") { - details <- createConnectionDetails(dbms = "oracle", - user = Sys.getenv("CDM5_ORACLE_USER"), - password = URLdecode(Sys.getenv("CDM5_ORACLE_PASSWORD")), - server = Sys.getenv("CDM5_ORACLE_SERVER")) - try(result <- achilles(details, - cdmDatabaseSchema = Sys.getenv("CDM5_ORACLE_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA"), - oracleTempSchema = Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA"), - sourceName = "NHANES", - cdmVersion = "5", - validateSchema = FALSE, - createTable = TRUE, - conceptHierarchy = FALSE, - createIndices = FALSE)) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - expect_true(class(result) == "achillesResults") - } else { - writeLines("Skipping oracle main test") - } -}) diff --git a/tests/testthat/test-4-export_postgresql.R b/tests/testthat/test-4-export_postgresql.R deleted file mode 100644 index 2a8a56af..00000000 --- a/tests/testthat/test-4-export_postgresql.R +++ /dev/null @@ -1,26 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles export does not throw an error on Postgres", { - # Postgresql - if (Sys.getenv("CDM5_POSTGRESQL_USER") != "") { - details <- createConnectionDetails(dbms = "postgresql", - user = Sys.getenv("CDM5_POSTGRESQL_USER"), - password = URLdecode(Sys.getenv("CDM5_POSTGRESQL_PASSWORD")), - server = Sys.getenv("CDM5_POSTGRESQL_SERVER")) - try(exportToJson(details, - cdmDatabaseSchema = Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_POSTGRESQL_OHDSI_SCHEMA"), - outputPath = "postgresql", - cdmVersion = "5")) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - # dashboard.json is the last report to be generated: - expect_true(file.exists("postgresql/dashboard.json")) - } else { - writeLines("Skipping postgress export test") - } -}) diff --git a/tests/testthat/test-5-export_mssql.R b/tests/testthat/test-5-export_mssql.R deleted file mode 100644 index ba16a151..00000000 --- a/tests/testthat/test-5-export_mssql.R +++ /dev/null @@ -1,27 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - - -test_that("Achilles export does not throw an error on SQL Server", { - # SQL Server - if (Sys.getenv("CDM5_SQL_SERVER_USER") != "") { - details <- createConnectionDetails(dbms = "sql server", - user = Sys.getenv("CDM5_SQL_SERVER_USER"), - password = URLdecode(Sys.getenv("CDM5_SQL_SERVER_PASSWORD")), - server = Sys.getenv("CDM5_SQL_SERVER_SERVER")) - try(exportToJson(details, - cdmDatabaseSchema = Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_SQL_SERVER_OHDSI_SCHEMA"), - outputPath = "sql_server", - cdmVersion = "5")) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - # dashboard.json is the last report to be generated: - expect_true(file.exists("sql_server/dashboard.json")) - } else { - writeLines("Skipping sql server export test") - } -}) diff --git a/tests/testthat/test-6-export_oracle.R b/tests/testthat/test-6-export_oracle.R deleted file mode 100644 index 75155b15..00000000 --- a/tests/testthat/test-6-export_oracle.R +++ /dev/null @@ -1,26 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles export does not throw an error on Oracle", { - # Oracle - if (Sys.getenv("CDM5_ORACLE_USER") != "") { - details <- createConnectionDetails(dbms = "oracle", - user = Sys.getenv("CDM5_ORACLE_USER"), - password = URLdecode(Sys.getenv("CDM5_ORACLE_PASSWORD")), - server = Sys.getenv("CDM5_ORACLE_SERVER")) - try(exportToJson(details, - cdmDatabaseSchema = Sys.getenv("CDM5_ORACLE_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA"), - outputPath = "oracle", - cdmVersion = "5")) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - # dashboard.json is the last report to be generated: - expect_true(file.exists("oracle/dashboard.json")) - } else { - writeLines("Skipping oracle export test") - } -}) diff --git a/tests/testthat/test-7-achilles_bigquery.R b/tests/testthat/test-7-achilles_bigquery.R deleted file mode 100644 index 28b483b2..00000000 --- a/tests/testthat/test-7-achilles_bigquery.R +++ /dev/null @@ -1,30 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functiosn don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles main does not throw an error on BigQuery", { - # BigQuery - if (Sys.getenv("CDM5_BIGQUERY_USER") != "") { - details <- createConnectionDetails(dbms = "bigquery", - user = Sys.getenv("CDM5_BIGQUERY_USER"), - password = URLdecode(Sys.getenv("CDM5_BIGQUERY_PASSWORD")), - server = Sys.getenv("CDM5_BIGQUERY_SERVER"), - extraSettings = Sys.getenv("CDM5_BIGQUERY_EXTRA_SETTINGS")) - try(result <- achilles(details, - cdmDatabaseSchema = Sys.getenv("CDM5_BIGQUERY_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_BIGQUERY_OHDSI_SCHEMA"), - sourceName = "OHDSI CDM V5 Database", - cdmVersion = "5", - validateSchema = FALSE, - createTable = TRUE, - conceptHierarchy = FALSE)) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - expect_true(class(result) == "achillesResults") - } else { - writeLines("Skipping bigquery main test") - } -}) - diff --git a/tests/testthat/test-8-export_bigquery.R b/tests/testthat/test-8-export_bigquery.R deleted file mode 100644 index 69314727..00000000 --- a/tests/testthat/test-8-export_bigquery.R +++ /dev/null @@ -1,27 +0,0 @@ -library(testthat) - -# These tests currently just check if the main achilles function and export functions don't throw any errors on the different platforms -# Note: Currently only checking CDM v5 - -test_that("Achilles export does not throw an error on BigQuery", { - # BigQuery - if (Sys.getenv("CDM5_BIGQUERY_USER") != "") { - details <- createConnectionDetails(dbms = "bigquery", - user = Sys.getenv("CDM5_BIGQUERY_USER"), - password = URLdecode(Sys.getenv("CDM5_BIGQUERY_PASSWORD")), - server = Sys.getenv("CDM5_BIGQUERY_SERVER"), - extraSettings = Sys.getenv("CDM5_BIGQUERY_EXTRA_SETTINGS")) - try(exportToJson(details, - cdmDatabaseSchema = Sys.getenv("CDM5_BIGQUERY_CDM_SCHEMA"), - resultsDatabaseSchema = Sys.getenv("CDM5_BIGQUERY_OHDSI_SCHEMA"), - outputPath = "bigquery", - cdmVersion = "5")) - if (file.exists("errorReport.txt")){ - writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) - } - # dashboard.json is the last report to be generated: - expect_true(file.exists("bigquery/dashboard.json")) - } else { - writeLines("Skipping bigquery export test") - } -}) diff --git a/tests/testthat/test-achilles_parameterized.R b/tests/testthat/test-achilles_parameterized.R new file mode 100644 index 00000000..22d41b65 --- /dev/null +++ b/tests/testthat/test-achilles_parameterized.R @@ -0,0 +1,53 @@ +library(testthat) + +# These tests currently just check if the main achilles function throws any errors on the different platforms and with single- and multi-threaded + +dbTypes = c("mysql", + "oracle", + "postgresql", + "redshift", + "sql server", + "pdw", + "netezza", + "bigquery") + +for (dbType in dbTypes) { + for (numThreads in c(1, 3)) { + test_that(sprintf("Achilles main with %d threads does not throw an error on %s", numThreads, dbType), { + sysUser <- Sys.getenv(sprintf("CDM5_%s_USER", toupper(dbType))) + sysPassword <- URLdecode(Sys.getenv(sprintf("CDM5_%s_PASSWORD", toupper(dbType)))) + sysServer <- Sys.getenv(sprintf("CDM5_%s_SERVER", toupper(dbType))) + sysExtraSettings <- Sys.getenv(sprintf("CDM5_%s_EXTRA_SETTINGS", toupper(dbType))) + if (sysUser != "" & + sysPassword != "" & + sysServer != "") { + cdmDatabaseSchema <- Sys.getenv(sprintf("CDM5_%s_CDM_SCHEMA", toupper(dbType))) + resultsDatabaseSchema <- Sys.getenv("CDM5_%s_OHDSI_SCHEMA", toupper(dbType)) + + details <- createConnectionDetails(dbms = dbType, + user = sysUser, + password = sysPassword, + server = sysServer, + extraSettings = sysExtraSettings) + try(result <- Achilles::achilles(details, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + scratchDatabaseSchema = resultsDatabaseSchema, + sourceName = "NHANES", + cdmVersion = "5", + numThreads = numThreads, + dropScratchTables = TRUE, + validateSchema = FALSE, + createTable = TRUE, + conceptHierarchy = FALSE, + createIndices = FALSE)) + if (file.exists("errorReport.txt")) { + writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) + } + expect_true(class(result) == "achillesResults") + } else { + writeLines(sprintf("Skipping %s main test", dbType)) + } + }) + } +} diff --git a/tests/testthat/test_export_parameterized.R b/tests/testthat/test_export_parameterized.R new file mode 100644 index 00000000..d21c99d0 --- /dev/null +++ b/tests/testthat/test_export_parameterized.R @@ -0,0 +1,48 @@ +library(testthat) + +# These tests currently just check if the export to JSON function throws any errors on the different platforms + +dbTypes = c("mysql", + "oracle", + "postgresql", + "redshift", + "sql server", + "pdw", + "netezza", + "bigquery") + +for (dbType in dbTypes) +{ + test_that(sprintf("ExportToJson does not throw an error on %s", dbType), { + sysUser <- Sys.getenv(sprintf("CDM5_%s_USER", toupper(dbType))) + sysPassword <- URLdecode(Sys.getenv(sprintf("CDM5_%s_PASSWORD", toupper(dbType)))) + sysServer <- Sys.getenv(sprintf("CDM5_%s_SERVER", toupper(dbType))) + sysExtraSettings <- Sys.getenv(sprintf("CDM5_%s_EXTRA_SETTINGS", toupper(dbType))) + if (sysUser != "" & + sysPassword != "" & + sysServer != "") { + cdmDatabaseSchema <- Sys.getenv(sprintf("CDM5_%s_CDM_SCHEMA", toupper(dbType))) + resultsDatabaseSchema <- Sys.getenv("CDM5_%s_OHDSI_SCHEMA", toupper(dbType)) + + details <- createConnectionDetails(dbms = dbType, + user = sysUser, + password = sysPassword, + server = sysServer, + extraSettings = sysExtraSettings) + try(exportToJson(details, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + outputPath = dbType)) + if (file.exists("errorReport.txt")){ + writeLines(readChar("errorReport.txt", file.info("errorReport.txt")$size)) + } + # dashboard.json is the last report to be generated: + expect_true(file.exists(file.path(dbType, "dashboard.json"))) + } else { + writeLines(sprintf("Skipping %s export test", dbType)) + } + }) +} + + + diff --git a/tests/testthat/test_viewResults_parameterized.R b/tests/testthat/test_viewResults_parameterized.R new file mode 100644 index 00000000..b2fc3731 --- /dev/null +++ b/tests/testthat/test_viewResults_parameterized.R @@ -0,0 +1,39 @@ +#Requires that Achilles has been run first + + +dbTypes = c("mysql", + "oracle", + "postgresql", + "redshift", + "sql server", + "pdw", + "netezza", + "bigquery") + +for (dbType in dbTypes) +{ + test_that(sprintf("fetchAchillesAnalysisResults does not throw an error on %s", dbType), { + sysUser <- Sys.getenv(sprintf("CDM5_%s_USER", toupper(dbType))) + sysPassword <- URLdecode(Sys.getenv(sprintf("CDM5_%s_PASSWORD", toupper(dbType)))) + sysServer <- Sys.getenv(sprintf("CDM5_%s_SERVER", toupper(dbType))) + sysExtraSettings <- Sys.getenv(sprintf("CDM5_%s_EXTRA_SETTINGS", toupper(dbType))) + if (sysUser != "" & + sysPassword != "" & + sysServer != "") { + cdmDatabaseSchema <- Sys.getenv(sprintf("CDM5_%s_CDM_SCHEMA", toupper(dbType))) + resultsDatabaseSchema <- Sys.getenv("CDM5_%s_OHDSI_SCHEMA", toupper(dbType)) + + details <- createConnectionDetails(dbms = dbType, + user = sysUser, + password = sysPassword, + server = sysServer, + extraSettings = sysExtraSettings) + + fetchAchillesAnalysisResults(connectionDetails = connectionDetails, resultsDatabaseSchema = resultsDatabaseSchema, analysisId = 106) + + for (analysisId in analysesDetails$ANALYSIS_ID) { + results <- fetchAchillesAnalysisResults(connectionDetails, resultsDatabaseSchema, analysisId = analysisId) + } + } + }) +} \ No newline at end of file