Skip to content

Commit

Permalink
improve block2prec robustness
Browse files Browse the repository at this point in the history
  • Loading branch information
christopherkenny committed Jun 30, 2024
1 parent 8238e02 commit 86a714c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 52 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# geomander 2.4.0

* Replaces magrittr pipe with the base R pipe internally and in examples.
* Improves handling of columns within `block2prec()`.
* Adds an attribute to `geo_match()` when the last precinct in `to` is not matched. This can later be used when filling missing precincts (the implicit 0s).


# geomander 2.3.0
Expand Down
85 changes: 34 additions & 51 deletions R/datatables.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,14 @@ block2prec <- function(block_table, matches, geometry = FALSE) {
if (missing(matches)) {
cli::cli_abort('Please provide an argument to {.arg matches}.')
}

nrow_to <- attr(matches, 'matching_max')
if (is.null(nrow_to)) {
nrow_to <- max(matches)
}

block_table <- block_table |> mutate(matches_id = matches)
block_table <- block_table |>
dplyr::mutate(matches_id = matches)

if (!geometry) {
ret <- block_table |>
Expand All @@ -144,29 +150,41 @@ block2prec <- function(block_table, matches, geometry = FALSE) {
} else {
ret <- block_table |>
dplyr::group_by(matches_id) |>
dplyr::summarize(dplyr::across(where(is.numeric), sum),
dplyr::summarize(
dplyr::across(where(is.numeric), sum),
dplyr::across(where(function(x) length(unique(x)) == 1), unique),
geometry = sf::st_union(geometry),
.groups = 'drop'
) |>
sf::st_as_sf()
}

ret <- ret |> arrange(matches_id)
missed <- c()
if (nrow(ret) != max(matches)) {
for (i in 1:max(matches)) {
if (ret$matches_id[i] != i) {
ret <- ret |> add_row(
matches_id = i,
.after = (i - 1)
)
missed <- c(missed, i)
if (nrow(ret) != nrow_to) {
cols_to_fill <- lapply(ret, function(x) {
if (is.numeric(x)) {
if (all(x >= 0)) {
# assume counts
0L
} else {
# don't fill when there are negative values
NA_integer_
}
} else if (length(unique(x)) == 1) {
unique(x)
} else {
NA
}
}
ret <- update_tb(ret, missed)
}) |>
purrr::set_names(names(ret)) |>
purrr::discard(function(x) is.na(x))

ret <- tidyr::complete(
data = ret, matches_id = seq_len(nrow_to),
fill = cols_to_fill,
explicit = FALSE
)
}

ret
}

Expand Down Expand Up @@ -219,7 +237,7 @@ block2prec_by_county <- function(block_table, precinct, precinct_county_fips, ep
prectb <- tibble()
countiesl <- unique(block_table$county)

for (cty in 1:length(countiesl)) {
for (cty in seq_along(countiesl)) {
bsub <- block_table |> filter(.data$county == countiesl[cty])
psub <- precinct |>
filter(.data[[precinct_county_fips]] == countiesl[cty]) |>
Expand All @@ -243,41 +261,6 @@ block2prec_by_county <- function(block_table, precinct, precinct_county_fips, ep
}


update_tb <- function(ret, missed) {
expected <- tibble::tibble(
matches_id = missed,
state = ifelse(length(unique(ret$state, na.rm = TRUE)) == 1, unique(ret$state, na.rm = TRUE), NA),
county = ifelse(length(unique(ret$county, na.rm = TRUE)) == 1, unique(ret$county, na.rm = TRUE), NA),
pop = 0,
pop_white = 0,
pop_black = 0,
pop_hisp = 0,
pop_aian = 0,
pop_asian = 0,
pop_nhpi = 0,
pop_other = 0,
pop_two = 0,
vap = 0,
vap_hisp = 0,
vap_white = 0,
vap_black = 0,
vap_aian = 0,
vap_asian = 0,
vap_nhpi = 0,
vap_other = 0,
vap_two = 0
)

expected <- expected |>
dplyr::select(dplyr::any_of(base::intersect(names(expected), names(ret))))

if (ncol(expected) == 0) {
return(ret)
}

dplyr::rows_patch(x = ret, y = expected, by = 'matches_id')
}

globalVariables(c(
'GEOID', 'variable', 'value', 'AWATER10', 'ALAND10', 'County',
'State', 'pop', 'pop_black', 'pop_hisp', 'pop_other',
Expand Down
6 changes: 5 additions & 1 deletion R/match.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,11 @@ geo_match <- function(from, to, method = 'center', by = NULL, tiebreaker = TRUE,
}
}

as.integer(ints)
ints <- as.integer(ints)
if (max(ints) != nrow(to)) {
attr(ints, 'matching_max') <- nrow(to)
}
ints
}

globalVariables(c('fromid', 'toid', 'area'))

0 comments on commit 86a714c

Please sign in to comment.