Skip to content

Commit

Permalink
Properly test the quarterly level summaries
Browse files Browse the repository at this point in the history
  • Loading branch information
milanmlft committed Nov 20, 2024
1 parent f158e9d commit 1f0a16b
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 14 deletions.
5 changes: 3 additions & 2 deletions preprocessing/R/monthly_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ summarise_counts <- function(omop_table, concept_col, date_col, level) {
)

if (level == "quarterly") {
# NOTE: lubridate::quarter is supported for postgres back-ends, but not sqlite
omop_table$date_quarter <- as.integer(lubridate::quarter(.data[[date_col]]))
# NOTE: lubridate::quarter is not supported for all SQL back-ends
omop_table <- omop_table |>
dplyr::mutate(date_quarter = as.integer(lubridate::quarter(.data[[date_col]])))
}

omop_table |>
Expand Down
62 changes: 50 additions & 12 deletions preprocessing/tests/testthat/test-monthly_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ test_that("generate_monthly_counts works on a CDM object", {
))
})

test_that("generate_monthly_counts can generate quarterly counts from CDM object", {
quarterly_counts <- generate_monthly_counts(mock_cdm, threshold = 0, replacement = 0, level = "quarterly")
expect_s3_class(quarterly_counts, "data.frame")
expect_true(nrow(quarterly_counts) > 0)
expect_named(quarterly_counts, c(
"concept_id", "concept_name", "date_year", "date_quarter", "record_count",
"person_count", "records_per_person"
))

## Sanity check date_quarter
expect_type(quarterly_counts$date_quarter, "integer")
expect_true(all(quarterly_counts$date_quarter >= 1 & quarterly_counts$date_quarter <= 4))
})

test_that("generate_monthly_counts replaces low-frequency values", {
threshold <- 5
replacement <- 0.5
Expand All @@ -20,26 +34,50 @@ test_that("generate_monthly_counts replaces low-frequency values", {

## Set up a mock measurement OMOP table
## Measurements for 3 different patients on the same day, with 1 patient having 2 measurements
mock_measurement <- data.frame(
measurement_id = 1:4,
person_id = c(1, 1, 2, 3),
measurement_type_concept_id = 12345,
measurement_concept_id = 1,
measurement_date = "2020-01-01",
value_as_number = c(2, 1, 2, 1),
value_as_concept_id = 0
)
generate_mock_measurements <- function(dates, n_persons) {
grid <- expand.grid(measurement_date = as.Date(dates), person_id = seq_len(n_persons))
data.frame(
grid,
measurement_id = seq_len(nrow(grid)),
measurement_type_concept_id = 12345,
measurement_concept_id = 1,
value_as_number = 0,
value_as_concept_id = 0
)
}
mock_measurement <- generate_mock_measurements("2020-01-01", 3)

test_that("summarise_counts produces the expected results at monthly level", {
res <- summarise_counts(mock_measurement, "measurement_concept_id", "measurement_date", level = "monthly")

expect_s3_class(res, "data.frame")
expect_named(res, c("concept_id", "date_year", "date_month", "record_count", "person_count", "records_per_person"))
expect_equal(nrow(res), 1)

expect_equal(res$record_count, 3)
expect_equal(res$person_count, 3)
expect_equal(res$records_per_person, 4 / 3)
expect_equal(res$records_per_person, 1)
})

con <- duckdb::dbConnect(duckdb::duckdb())
test_that("summarise_counts produces the expected results at quarterly level", {
mock_measurement <- generate_mock_measurements(
dates = c("2012-03-26", "2012-05-04", "2012-09-23", "2012-12-31"),
n_persons = 3
)
res <- summarise_counts(mock_measurement, "measurement_concept_id", "measurement_date", level = "quarterly")

expect_s3_class(res, "data.frame")
expect_named(res, c("concept_id", "date_year", "date_quarter", "record_count", "person_count", "records_per_person"))
expect_equal(nrow(res), 4)

expect_equal(res$date_quarter, c(1, 2, 3, 4))

expect_equal(res$person_count, rep(3, 4))
expect_equal(res$records_per_person, rep(1, 4))
expect_equal(res$record_count, rep(3, 4))
})

con <- connect_to_db(duckdb::duckdb())
duckdb::duckdb_register(con, "measurement", mock_measurement)
db_measurement <- dplyr::tbl(con, "measurement")
test_that("summarise_counts works on Database-stored tables at monthly level", {
Expand All @@ -59,7 +97,7 @@ test_that("summarise_counts works on Database-stored tables at quarterly level",
db_res <- summarise_counts(db_measurement, "measurement_concept_id", "measurement_date", level = "quarterly")

expect_s3_class(db_res, "data.frame")
expect_named(db_res, c("concept_id", "date_year", "date_month", "record_count", "person_count", "records_per_person"))
expect_named(db_res, c("concept_id", "date_year", "date_quarter", "record_count", "person_count", "records_per_person"))
expect_type(db_res$record_count, "integer")
expect_type(db_res$person_count, "integer")
expect_type(db_res$records_per_person, "double")
Expand Down

0 comments on commit 1f0a16b

Please sign in to comment.