Skip to content

Commit

Permalink
fix: Use better low-frequency replacement values (#115)
Browse files Browse the repository at this point in the history
Use `9.99` as the replacement value for both the public and internal versions, setting the threshold at `10` for both. Also use use better rounding for nicer display of results.
  • Loading branch information
milanmlft authored Dec 4, 2024
1 parent a1e6e0d commit c0a2d8c
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 12 deletions.
4 changes: 2 additions & 2 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ PREPROCESS_DB_CDM_SCHEMA= # Schema name in the database to connect the
PREPROCESS_SUMMARISE_LEVEL=monthly # Level to summarise record counts at (monthly or quarterly)

# Low-frequency replacement
LOW_FREQUENCY_THRESHOLD=5
LOW_FREQUENCY_REPLACEMENT=2.5
LOW_FREQUENCY_THRESHOLD=10
LOW_FREQUENCY_REPLACEMENT=9.999999

# For testing
TEST_DB_PATH=./data-raw/test_db/eunomia
21 changes: 15 additions & 6 deletions app/R/mod_datatable.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,21 @@ mod_datatable_server <- function(id, selected_dates, bundle_concepts) {

moduleServer(id, function(input, output, session) {
concepts_with_counts <- reactive({
low_freq_threshold <- as.numeric(Sys.getenv("LOW_FREQUENCY_THRESHOLD"))

join_counts_to_concepts(all_concepts, monthly_counts, selected_dates()) |>
# Reorder and select the columns we want to display
dplyr::select(
"concept_id", "concept_name",
"total_records", "mean_persons",
"domain_id", "vocabulary_id", "concept_class_id"
) |>
# Conditionally round numbers for better display
dplyr::mutate(
dplyr::across(
dplyr::where(is.double),
function(x) ifelse(x > low_freq_threshold, round(x), round(x, 2))
)
)
})
output$datatable <- DT::renderDT(concepts_with_counts(),
Expand All @@ -78,8 +87,8 @@ mod_datatable_server <- function(id, selected_dates, bundle_concepts) {
colnames = c(
"ID" = "concept_id",
"Name" = "concept_name",
"Records" = "total_records",
"Patients" = "mean_persons",
"Total Records" = "total_records",
"Average Patients" = "mean_persons",
"Domain ID" = "domain_id",
"Vocabulary ID" = "vocabulary_id",
"Concept Class ID" = "concept_class_id"
Expand Down Expand Up @@ -111,10 +120,10 @@ join_counts_to_concepts <- function(concepts, monthly_counts, selected_dates) {
filter_dates(selected_dates) |>
dplyr::group_by(.data$concept_id) |>
dplyr::summarise(
# round to avoid decimal values in in total_records because of low-req replacement
total_records = sum(round(.data$record_count)),
mean_persons = round(mean(.data$person_count, na.rm = TRUE), 2),
mean_records_per_person = round(mean(.data$records_per_person, na.rm = TRUE), 2)
total_records = sum(.data$record_count),
# Note that we can only calculate the average number of persons per month here
# as we cannot identify unique patients across months
mean_persons = mean(.data$person_count, na.rm = TRUE),
)
# Use inner_join so we only keep concepts for which we have counts in the selected dates
dplyr::inner_join(concepts, summarised_counts, by = "concept_id")
Expand Down
4 changes: 1 addition & 3 deletions app/tests/testthat/test-mod_datatable.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,12 @@ test_that("Adding records and patients counts to concepts table works", {
concepts_with_counts <- join_counts_to_concepts(mock_selection_data, mock_monthly_counts, selected_dates)

expect_in(
c("concept_id", "concept_name", "total_records", "mean_persons", "mean_records_per_person"),
c("concept_id", "concept_name", "total_records", "mean_persons"),
names(concepts_with_counts)
)
expect_equal(nrow(concepts_with_counts), 3)
expect_equal(concepts_with_counts$total_records, c(100, 200, 300))
expect_equal(concepts_with_counts$mean_persons, c(10, 10, 10))
expect_equal(concepts_with_counts$mean_records_per_person, c(10, 10, 10))
})

test_that("Added counts depends on selected dates", {
Expand All @@ -81,7 +80,6 @@ test_that("Added counts depends on selected dates", {

expect_equal(concepts_with_counts$total_records, c(100, 100, 100))
expect_equal(concepts_with_counts$mean_persons, c(10, 10, 10))
expect_equal(concepts_with_counts$mean_records_per_person, c(10, 10, 10))
})

test_that("Only concepts with data for the selected date range are kept", {
Expand Down
Binary file modified data/test_data/internal/omopcat_concepts.parquet
Binary file not shown.
Binary file modified data/test_data/internal/omopcat_monthly_counts.parquet
Binary file not shown.
Binary file modified data/test_data/internal/omopcat_summary_stats.parquet
Binary file not shown.
Binary file modified data/test_data/public/omopcat_concepts.parquet
Binary file not shown.
Binary file modified data/test_data/public/omopcat_monthly_counts.parquet
Binary file not shown.
Binary file modified data/test_data/public/omopcat_summary_stats.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion public.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ PREPROCESS_SUMMARISE_LEVEL=quarterly # Level to summarise record counts at (mon

# Low-frequency replacement
LOW_FREQUENCY_THRESHOLD=10
LOW_FREQUENCY_REPLACEMENT=5
LOW_FREQUENCY_REPLACEMENT=9.99

# For testing
TEST_DB_PATH=./data-raw/test_db/eunomia

0 comments on commit c0a2d8c

Please sign in to comment.