Skip to content

Commit

Permalink
Merge pull request #161 from RSGInc/bug_fixes_06_2024
Browse files Browse the repository at this point in the history
Bug fixes 06 2024
  • Loading branch information
james-clark-rsg authored Jun 20, 2024
2 parents 1f3ff6b + 360131c commit bf48868
Show file tree
Hide file tree
Showing 282 changed files with 1,175 additions and 902 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Session Data files
.RData
.RDataTmp
.Rproj
*.Rproj

# User-specific files
.Ruserdata
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: travelSurveyTools
Title: travelSurveyTools
Version: 2.4.2
Version: 2.4.3
Authors@R: c(
person("RSG", "Inc.", , "[email protected]", role = c("aut", "cre")),
person("Ashley", "Asmus", , "[email protected]", role = "aut"),
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# travelSurveyTools 2.4.3

- Resolve bugs and updated function syntax in `hts_prep_triprate` and `hts_prep_variable`
- Updated `remove_outliers` parameter to default to FALSE in `hts_prep_triprate`
- Added a warning message in `hts_remove_outliers`

# travelSurveyTools 2.4.2

- Add summarize_var and summarize_by to output. Select weight automatically in `hts_summary_wrapper`
Expand Down
256 changes: 131 additions & 125 deletions R/hts_prep_triprate.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,16 @@ hts_prep_triprate = function(summarize_by = NULL,
day_name = "day",
ids = c("hh_id", "person_id", "day_id", "trip_id", "vehicle_id"),
wts = c("hh_weight", "person_weight", "day_weight", "trip_weight", "hh_weight"),
remove_outliers = TRUE,
remove_outliers = FALSE,
threshold = 0.975,
weighted = TRUE,
hts_data) {
hts_data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle)) {

# Check variable_list first
variables_dt = hts_validate_variable_list(variables_dt, hts_data)

Expand Down Expand Up @@ -135,151 +141,151 @@ hts_prep_triprate = function(summarize_by = NULL,
shared_name %in% summarize_by &
get(day_name) == 1 &
get(trip_name) == 1, shared_name]

if (length(day_trip_vars) > 0) {

setnames(variables_dt, trip_name, 'trip_table')

variables_dt[shared_name %in% day_trip_vars, trip_table := 0]

setnames(variables_dt, 'trip_table', trip_name)

}

if (length(summarize_by) > 0) {
byvar_dt = hts_prep_byvar(summarize_by,
variables_dt = variables_dt,
hts_data = hts_data,
byvar_ids = ids,
byvar_wts = wts
)

merge_cols = names(byvar_dt)[names(byvar_dt) %in% names(trip_control)]

triprate_dt = merge(trip_control, byvar_dt, by = merge_cols, all.x = TRUE)

triprate_cols = intersect(names(triprate_dt), c(ids, wts))

triprate_cols = triprate_cols[!triprate_cols %in% c(trip_id, trip_wt)]

triprate_cols_all = c(triprate_cols, summarize_by)

if (weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(get(trip_wt))),
by = triprate_cols_all
]
}

if (!weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(!is.na(get(trip_id)))),
by = triprate_cols_all
]
if (length(day_trip_vars) > 0) {

setnames(variables_dt, trip_name, 'trip_table')

variables_dt[shared_name %in% day_trip_vars, trip_table := 0]

setnames(variables_dt, 'trip_table', trip_name)

}

# fill in with zeros for zero trips on a given day:
triprate_dt[, `:=`(
num_trips = nafill(num_trips, fill = 0)
)]

# If one of the by-variables is in trip table, need to expand to
# include all levels of the variable for every trip, and fill with zeros:
if (trip_id %in% names(byvar_dt)) {
# fill in with zeros for zero trips for a given level of xt_var using dcast:
dcast_formula =
paste0(
paste0(triprate_cols, collapse = " + "),
" ~ ",
paste0(summarize_by, collapse = " + ")
)

triprate_cast = dcast(triprate_dt,
dcast_formula,
value.var = "num_trips",
fill = 0
if (length(summarize_by) > 0) {
byvar_dt = hts_prep_byvar(summarize_by,
variables_dt = variables_dt,
hts_data = hts_data,
byvar_ids = ids,
byvar_wts = wts
)

# Remove columns where NA levels of factors were generated during dcast:
na_filled_cols = names(triprate_cast)[names(triprate_cast) %like% "_NA"]
merge_cols = names(byvar_dt)[names(byvar_dt) %in% names(trip_control)]

if (length(na_filled_cols) > 0) {
triprate_cast[, c(na_filled_cols) := NULL]
}
triprate_dt = merge(trip_control, byvar_dt, by = merge_cols, all.x = TRUE)

# transform back to long format, with separate cols for weighted & unwt. trip rates:
triprate_dt = data.table::melt(
triprate_cast,
id.vars = triprate_cols,
value.name = "num_trips"
)
triprate_cols = intersect(names(triprate_dt), c(ids, wts))

triprate_cols = triprate_cols[!triprate_cols %in% c(trip_id, trip_wt)]

triprate_cols_all = c(triprate_cols, summarize_by)

# Relabel xtab trip vars after melting:
if (length(summarize_by) > 1) {
triprate_dt[, c(summarize_by) := tstrsplit(variable, "_")]
triprate_dt[, variable := NULL]
if (weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(get(trip_wt))),
by = triprate_cols_all
]
}

if (length(summarize_by) == 1) {
setnames(triprate_dt, old = "variable", new = summarize_by)
if (!weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(!is.na(get(..trip_id)))),
by = triprate_cols_all
]
}

triprate_dt = triprate_dt[]
# fill in with zeros for zero trips on a given day:
triprate_dt[, `:=`(
num_trips = nafill(num_trips, fill = 0)
)]

# If one of the by-variables is in trip table, need to expand to
# include all levels of the variable for every trip, and fill with zeros:
if (trip_id %in% names(byvar_dt)) {
# fill in with zeros for zero trips for a given level of xt_var using dcast:
dcast_formula =
paste0(
paste0(triprate_cols, collapse = " + "),
" ~ ",
paste0(summarize_by, collapse = " + ")
)

triprate_cast = dcast(triprate_dt,
dcast_formula,
value.var = "num_trips",
fill = 0
)

# Remove columns where NA levels of factors were generated during dcast:
na_filled_cols = names(triprate_cast)[names(triprate_cast) %like% "_NA" | names(triprate_cast) == "NA"]

if (length(na_filled_cols) > 0) {
triprate_cast[, c(na_filled_cols) := NULL]
}

# transform back to long format, with separate cols for weighted & unwt. trip rates:
triprate_dt = data.table::melt(
triprate_cast,
id.vars = triprate_cols,
value.name = "num_trips"
)

# Relabel xtab trip vars after melting:
if (length(summarize_by) > 1) {
triprate_dt[, c(summarize_by) := tstrsplit(variable, "_")]
triprate_dt[, variable := NULL]
}

if (length(summarize_by) == 1) {
setnames(triprate_dt, old = "variable", new = summarize_by)
}

triprate_dt = triprate_dt[]
}

if (weighted) {
# calculate trip rate
triprate_dt[, trip_rate :=
ifelse(num_trips == 0, 0, num_trips / get(day_wt))]

# Save counts of trips under a different name
setnames(triprate_dt, "num_trips", "trip_count_wtd")

setnames(triprate_dt, "trip_rate", "num_trips")
}
}

if (weighted) {
# calculate trip rate
triprate_dt[, trip_rate :=
ifelse(num_trips == 0, 0, num_trips / get(day_wt))]
# remove outliers
if (remove_outliers) {
out = hts_remove_outliers(triprate_dt,
numvar = "num_trips",
threshold = threshold
)

# Save counts of trips under a different name
setnames(triprate_dt, "num_trips", "trip_count_wtd")
triprate_dt = out[["dt"]]

setnames(triprate_dt, "trip_rate", "num_trips")
outlier_table = out[["outlier_description"]]
}
}

# remove outliers
if (remove_outliers) {
out = hts_remove_outliers(triprate_dt,
numvar = "num_trips",
threshold = threshold
)

triprate_dt = out[["dt"]]
# Bin trips:
triprate_binned = hts_bin_var(
prepped_dt = triprate_dt,
numvar = "num_trips",
nbins = 7
)

outlier_table = out[["outlier_description"]]
}

# Bin trips:
triprate_binned = hts_bin_var(
prepped_dt = triprate_dt,
numvar = "num_trips",
nbins = 7
)

if (weighted) {
setnames(triprate_dt, "num_trips", "num_trips_wtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_wtd", skip_absent = TRUE)
} else {
setnames(triprate_dt, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
}
prepped_dt_ls = list(
"num" = triprate_dt,
"cat" = triprate_binned
)

# Append outliers:
if (remove_outliers) {
if (weighted) {
setnames(triprate_dt, "num_trips", "num_trips_wtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_wtd", skip_absent = TRUE)
} else {
setnames(triprate_dt, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
}
prepped_dt_ls = list(
"cat" = triprate_binned,
"num" = triprate_dt,
"outliers" = outlier_table
"cat" = triprate_binned
)
}


return(prepped_dt_ls)

# Append outliers:
if (remove_outliers) {
prepped_dt_ls = list(
"cat" = triprate_binned,
"num" = triprate_dt,
"outliers" = outlier_table
)
}


return(prepped_dt_ls)
}

## quiets concerns of R CMD check
utils::globalVariables(c("trip_weight", "num_trips", "trip_rate", "day_weight", "trip_table"))
utils::globalVariables(c("..trip_id","trip_weight", "num_trips", "trip_rate", "day_weight", "trip_table"))
Loading

0 comments on commit bf48868

Please sign in to comment.