Skip to content

Commit

Permalink
Merge pull request #109 from RSGInc/102-document-codebook-expected-ta…
Browse files Browse the repository at this point in the history
…ble-structure

add to other functions and alter documentation
  • Loading branch information
erika-redding authored Feb 9, 2024
2 parents 42eecde + 6b54f8f commit 7f70ceb
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 14 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export(hts_summary_cat)
export(hts_summary_num)
export(hts_to_so)
export(hts_trip_vehid)
export(hts_validate_variable_list)
export(join_spatial)
import(data.table)
import(sf)
Expand Down
11 changes: 4 additions & 7 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,24 +122,21 @@

#' List of variables
#'
#' A dataset containing information about all variables existing in the hh, person,
#' day, trip, and vehicle tables. The variables are as follows:
#' A dataset containing information about all variables in data. This must contain one binary column indicating table location for each table in the data (ie., hh, person, day, trip, vehicle).
#'
#' @format ## `variable_list`
#' A data frame with 41 rows and 13 columns:
#' A data frame with 55 rows and 10 columns:
#' \describe{
#' \item{order}{The order the variables are presented in}
#' \item{source}{Where the variable was created}
#' \item{variable}{Name of the variable}
#' \item{is_checkbox}{The variable is a 'Select all that Apply' question}
#' \item{hh}{The variable exists in the hh table}
#' \item{person}{The variable exists in the person table}
#' \item{day}{The variable exists in the day table}
#' \item{trip}{The variable exists in the trip table}
#' \item{vehicle}{The variable exists in the vehicle table}
#' \item{data_type}{Data type of the variable}
#' \item{data_type}{Data type of the variable ("iteger/categorical", "numeric", "character")}
#' \item{description}{A description of the variable}
#' \item{logic}{Conditions where the variable should have a value}
#' \item{shared_name}{Shared part of name for checkbox variable (e.g., race_1 -> race) or variable name (e.g., age -> age)}
#' }
"variable_list"

Expand Down
3 changes: 3 additions & 0 deletions R/hts_prep_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ hts_prep_data = function(summarize_var = NULL,
strataname = NULL) {
# tictoc::tic("Total Time")

# Check variable_list first
variables_dt = hts_validate_variable_list(variables_dt, data)

# Message:
msg_pt1 = paste0("Creating a summary of ",
hts_find_var(summarize_var, data = data, variables_dt = variables_dt), " ", summarize_var)
Expand Down
2 changes: 2 additions & 0 deletions R/hts_prep_triprate.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ hts_prep_triprate = function(summarize_by = NULL,
threshold = 0.975,
weighted = TRUE,
hts_data) {
# Check variable_list first
variables_dt = hts_validate_variable_list(variables_dt, hts_data)

tripdat = hts_data[[trip_name]]
daydat = hts_data[[day_name]]
Expand Down
91 changes: 91 additions & 0 deletions R/hts_validate_variable_list.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' Check that variable list has required elements for hts_prep_data returns updated variable list or prompts error
#'
#' @param variable_list Name of the variable to summarize. Default is NULL
#' @param hts_data List of named tables specified in hts_prep_data
#'
#' @return Cleaned variable_list
#' @export
#'
#' @examples
#' hts_validate_variable_list(variable_list, test_data)
#'

hts_validate_variable_list = function(variable_list,
hts_data){


var_dt = copy(variable_list)

# Get expected variable names
tbl_names = names(hts_data)

admin_names = c('variable',
'is_checkbox',
'data_type',
'description',
'shared_name')

expected_names = c(admin_names, tbl_names)

# Stop if missing required columns
if (length(setdiff(expected_names, names(var_dt))) > 0){

stop(
'Columns are missing: ',
paste(setdiff(expected_names, names(var_dt)), collapse = ', '),
"See https://rsginc.github.io/travelSurveyTools/reference/variable_list.html for variable requirements."
)

}

# Check for missing values in admin names
missing_admin_counts = lapply(var_dt[, ..admin_names], function(x) sum(is.na(x)))

if (length(missing_admin_counts[missing_admin_counts != 0]) > 0){

stop(
'Missing values in: ',
paste(names(missing_admin_counts)[missing_admin_counts != 0], collapse = ', '),
"See https://rsginc.github.io/travelSurveyTools/reference/variable_list.html for variable requirements."
)

}

# Check that each variable is in at least one table
var_dt$tbl_count = rowSums(var_dt[, ..tbl_names])

if (var_dt[tbl_count == 0, .N]){

warning(
'Variable(s) do not have a location specified: ',
paste0(var_dt[tbl_count == 0, variable], collapse = ', ' )
)

}

# Check for duplicate variables
if (var_dt[, .N, variable][N > 1, .N] != 0){

stop('Duplicate variables appear in variable list')

}

# Check that checkbox variables are "integer/categorical"

not_categorical = var_dt[is_checkbox == 1 & data_type != 'integer/categorical', .N]

if(not_categorical > 0){

message('Editing ', not_categorical, ' categorical variables to "integer/categorical"')

}

var_dt = var_dt[, ..expected_names]

return(var_dt)

}

## quiets concerns of R CMD check
utils::globalVariables(c("..admin_names", "..tbl_names", "tbl_count", "N",
"..expected_names"))
23 changes: 23 additions & 0 deletions man/hts_validate_variable_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 4 additions & 7 deletions man/variable_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions tests/testthat/test_hts_validate_variable_list.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

# Load necessary libraries and setup environment
library(testthat)
library(data.table)

data("test_data")

test_that("hts_validate_variable_list should return an error", {

data("variable_list")

variable_list[, shared_name := NA]

expect_error(hts_validate_variable_list(variable_list, test_data))


})

test_that("hts_validate_variable_list should return a warning", {

data("variable_list")

variable_list[variable == 'age', person := 0]

expect_warning(hts_validate_variable_list(variable_list, test_data))


})

0 comments on commit 7f70ceb

Please sign in to comment.