diff --git a/NAMESPACE b/NAMESPACE index 780edcf..dfcd2b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,7 @@ export(get_taxon_rank) export(get_utm_zone) export(long2UTM) export(order_cols) -export(removeEmptyTables) +export(remove_empty_tables) export(replace_blanks) export(te_check) export(utm_to_ll) diff --git a/NEWS.md b/NEWS.md index eda3964..f2e0e6c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# QCkit v0.1.8 (not yet released) + +2024-06-27 +* bug fixes for `generate_ll_from_utm()` +* add function `remove_empty_tables()` (and associated unit tests) +* update documentation for `replace blanks()` to indicate it can replace blanks with more than just NA + # QCkit v0.1.7 2024-05-08 * Updated the `replace_blanks()` function to accept any missing value code a user inputs (but it still defaults to NA). diff --git a/R/geography.R b/R/geography.R index accdfd5..2b89c94 100644 --- a/R/geography.R +++ b/R/geography.R @@ -425,7 +425,11 @@ generate_ll_from_utm <- function(df, # Separate df with just coordinates. We'll filter out any NA rows. coord_df <- df %>% - dplyr::select(`_UTMJOINCOL`, {{EastingCol}}, {{NorthingCol}}, {{ZoneCol}}, {{DatumCol}}) + dplyr::select(`_UTMJOINCOL`, + {{EastingCol}}, + {{NorthingCol}}, + {{ZoneCol}}, + {{DatumCol}}) withr::with_envvar(c("PROJ_LIB" = ""), { # This is a fix for the proj library bug in R (see pinned post "sf::st_read() of geojson not getting CRS" in IMData General Discussion). coord_df <- coord_df %>% @@ -437,14 +441,19 @@ generate_ll_from_utm <- function(df, na_row_count <- nrow(df) - nrow(coord_df) if (na_row_count > 0) { - warning(paste(na_row_count, "rows are missing UTM coordinates, zone, and/or datum information."), call. = FALSE) + warning(paste( + na_row_count, + "rows are missing UTM coordinates, zone, and/or datum information."), + call. = FALSE) } ## Set up CRS for lat/long data - latlong_CRS <- sp::CRS(glue::glue("+proj=longlat +datum={latlong_datum}")) # CRS for our new lat/long values + latlong_CRS <- sp::CRS(glue::glue("+proj=longlat +datum={latlong_datum}")) + # CRS for our new lat/long values # Loop through each datum and zone in the data - zones_datums <- dplyr::select(coord_df, {{ZoneCol}}, {{DatumCol}}) %>% # Get vector of zones present in data + zones_datums <- dplyr::select(coord_df, {{ZoneCol}}, {{DatumCol}}) %>% + # Get vector of zones present in data unique() new_coords <- sapply(1:nrow(zones_datums), function(zone_datum_index) { @@ -454,7 +463,8 @@ generate_ll_from_utm <- function(df, zone_num <- current_zone north_south <- "" } else { - zone_num <- stringr::str_extract(current_zone, "\\d+") # sp::CRS wants zone number only, e.g. 11, not 11N + zone_num <- stringr::str_extract(current_zone, "\\d+") + # sp::CRS wants zone number only, e.g. 11, not 11N zone_letter <- tolower(stringr::str_extract(current_zone, "[A-Za-z]")) if (!is.na(zone_letter) && zone_letter == "s") { north_south <- " +south" @@ -469,17 +479,21 @@ generate_ll_from_utm <- function(df, filtered_df <- coord_df %>% dplyr::filter((!!rlang::ensym(ZoneCol) == current_zone & !!rlang::ensym(DatumCol) == current_datum)) sp_utm <- sp::SpatialPoints(filtered_df %>% - dplyr::select({{EastingCol}}, {{NorthingCol}}) %>% + dplyr::select({{EastingCol}}, + {{NorthingCol}}) %>% as.matrix(), - proj4string = utm_CRS) # Convert UTM columns into a SpatialPoints object - sp_geo <- sp::spTransform(sp_utm, latlong_CRS) %>% # Transform UTM to Lat/Long + proj4string = utm_CRS) + # Convert UTM columns into a SpatialPoints object + sp_geo <- sp::spTransform(sp_utm, latlong_CRS) %>% + # Transform UTM to Lat/Long tibble::as_tibble() # Add lat/long columns back into the original dataframe latlong <- tibble::tibble(`_UTMJOINCOL` = filtered_df$`_UTMJOINCOL`, decimalLatitude = sp_geo[[2]], decimalLongitude = sp_geo[[1]], - LatLong_CRS = latlong_CRS@projargs) # Store the coordinate reference system PROJ string in the dataframe + LatLong_CRS = latlong_CRS@projargs) + # Store the coordinate reference system PROJ string in the dataframe return(latlong) diff --git a/R/remove_empty_tables.R b/R/remove_empty_tables.R index 289ea6e..a042031 100644 --- a/R/remove_empty_tables.R +++ b/R/remove_empty_tables.R @@ -11,9 +11,9 @@ #' item_b = mtcars, #' item_c = iris) #' -#' tidy_list <- removeEmptyTables(test_list) +#' tidy_list <- remove_empty_tables(test_list) #' -removeEmptyTables <- function(df_list) { +remove_empty_tables <- function(df_list) { non_empty_list <- purrr::compact(df_list) # Remove empty dataframes tables_removed <- setdiff(names(df_list), names(non_empty_list)) # Get names of removed dataframes diff --git a/R/replace_blanks.R b/R/replace_blanks.R index 85813f7..3f028d2 100644 --- a/R/replace_blanks.R +++ b/R/replace_blanks.R @@ -1,7 +1,8 @@ -#' Replaces all blank cells with NA +#' Replaces all blank cells a missing value code of your choice #' -#' @details `replace_blanks()` is particularly useful for exporting data from a -#' database (such as access) and converting it to a data package with metadata. +#' @description `replace_blanks()` is particularly useful for exporting data +#' from a database (such as access) and converting it to a data package with +#' metadata. #' #' `replace_blanks()` will import all .csv files in the specified working #' directory. The files are then written back out to the same directory, @@ -10,9 +11,23 @@ #' If no missing value is specified, the function defaults to replacing all #' blanks with "NA". #' -#' One exception is if a .csv contains NO data (i.e. just column names and no -#' data in any of the cells). In this case, the blanks will not be replaced with -#' NA (as the function cannot determine how many NAs to include). +#' Please keep in mind the "missing" is a general term for all data +#' not present in the data file or data package. Although you may have a very +#' good reason for not providing data and that data may not, from the data +#' package creator's perspective, be "missing" (maybe you never intended to +#' collect it) from a data package user's perspective any data that is not in +#' the data package is effectively "missing" from the data package. Therefore, +#' it is critical to document in metadata any data that are absent with an +#' appropriate "missingValueCode" and "missingValueDefinition". These terms are +#' defined by the metadata schema and are broadly used to apply to any data not +#' present. +#' +#' This function will replace all empty cells and all cells with NA with a +#' "missingValueCode" of your choice (although it defaults to NA). +#' +#' @details One exception is if a .csv contains NO data (i.e. just column names +#' and no data in any of the cells). In this case, the blanks will not be +#' replaced with NA (as the function cannot determine how many NAs to include). #' #' @param directory String. Path to the file(s) to have blanks replaced with #' NAs. Defaults to the working directory of the project (here::here()) diff --git a/docs/news/index.html b/docs/news/index.html index 53daa97..af13b37 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -3,7 +3,7 @@
- +NEWS.md
2024-06-27 * bug fixes for generate_ll_from_utm()
* add function remove_empty_tables()
(and associated unit tests) * update documentation for replace blanks()
to indicate it can replace blanks with more than just NA
2024-05-08 * Updated the replace_blanks()
function to accept any missing value code a user inputs (but it still defaults to NA). 2024-04-18 * Added the function generate_ll_from_utm()
which supersedes convert_utm_to_ll()
and improves upon it in several ways, included accepting a column of UTMs and also returns a column of CRS along with the decimal degrees latitude and longitude. 2024-04-17 * Major updates to the DRR template including: using snake case instead of camel case for variables; updating Table 3 to only display filenames only when there are multiple files, fixed multiple issues with footnotes, added citations to NPSdataverse packages, added a section that prints the R code needed to download the data package and load it in to R. * Updated the DRR documentation to account for new variable names.