diff --git a/NAMESPACE b/NAMESPACE index 780edcf..dfcd2b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,7 @@ export(get_taxon_rank) export(get_utm_zone) export(long2UTM) export(order_cols) -export(removeEmptyTables) +export(remove_empty_tables) export(replace_blanks) export(te_check) export(utm_to_ll) diff --git a/NEWS.md b/NEWS.md index eda3964..f2e0e6c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# QCkit v0.1.8 (not yet released) + +2024-06-27 +* bug fixes for `generate_ll_from_utm()` +* add function `remove_empty_tables()` (and associated unit tests) +* update documentation for `replace blanks()` to indicate it can replace blanks with more than just NA + # QCkit v0.1.7 2024-05-08 * Updated the `replace_blanks()` function to accept any missing value code a user inputs (but it still defaults to NA). diff --git a/R/geography.R b/R/geography.R index accdfd5..2b89c94 100644 --- a/R/geography.R +++ b/R/geography.R @@ -425,7 +425,11 @@ generate_ll_from_utm <- function(df, # Separate df with just coordinates. We'll filter out any NA rows. coord_df <- df %>% - dplyr::select(`_UTMJOINCOL`, {{EastingCol}}, {{NorthingCol}}, {{ZoneCol}}, {{DatumCol}}) + dplyr::select(`_UTMJOINCOL`, + {{EastingCol}}, + {{NorthingCol}}, + {{ZoneCol}}, + {{DatumCol}}) withr::with_envvar(c("PROJ_LIB" = ""), { # This is a fix for the proj library bug in R (see pinned post "sf::st_read() of geojson not getting CRS" in IMData General Discussion). coord_df <- coord_df %>% @@ -437,14 +441,19 @@ generate_ll_from_utm <- function(df, na_row_count <- nrow(df) - nrow(coord_df) if (na_row_count > 0) { - warning(paste(na_row_count, "rows are missing UTM coordinates, zone, and/or datum information."), call. = FALSE) + warning(paste( + na_row_count, + "rows are missing UTM coordinates, zone, and/or datum information."), + call. = FALSE) } ## Set up CRS for lat/long data - latlong_CRS <- sp::CRS(glue::glue("+proj=longlat +datum={latlong_datum}")) # CRS for our new lat/long values + latlong_CRS <- sp::CRS(glue::glue("+proj=longlat +datum={latlong_datum}")) + # CRS for our new lat/long values # Loop through each datum and zone in the data - zones_datums <- dplyr::select(coord_df, {{ZoneCol}}, {{DatumCol}}) %>% # Get vector of zones present in data + zones_datums <- dplyr::select(coord_df, {{ZoneCol}}, {{DatumCol}}) %>% + # Get vector of zones present in data unique() new_coords <- sapply(1:nrow(zones_datums), function(zone_datum_index) { @@ -454,7 +463,8 @@ generate_ll_from_utm <- function(df, zone_num <- current_zone north_south <- "" } else { - zone_num <- stringr::str_extract(current_zone, "\\d+") # sp::CRS wants zone number only, e.g. 11, not 11N + zone_num <- stringr::str_extract(current_zone, "\\d+") + # sp::CRS wants zone number only, e.g. 11, not 11N zone_letter <- tolower(stringr::str_extract(current_zone, "[A-Za-z]")) if (!is.na(zone_letter) && zone_letter == "s") { north_south <- " +south" @@ -469,17 +479,21 @@ generate_ll_from_utm <- function(df, filtered_df <- coord_df %>% dplyr::filter((!!rlang::ensym(ZoneCol) == current_zone & !!rlang::ensym(DatumCol) == current_datum)) sp_utm <- sp::SpatialPoints(filtered_df %>% - dplyr::select({{EastingCol}}, {{NorthingCol}}) %>% + dplyr::select({{EastingCol}}, + {{NorthingCol}}) %>% as.matrix(), - proj4string = utm_CRS) # Convert UTM columns into a SpatialPoints object - sp_geo <- sp::spTransform(sp_utm, latlong_CRS) %>% # Transform UTM to Lat/Long + proj4string = utm_CRS) + # Convert UTM columns into a SpatialPoints object + sp_geo <- sp::spTransform(sp_utm, latlong_CRS) %>% + # Transform UTM to Lat/Long tibble::as_tibble() # Add lat/long columns back into the original dataframe latlong <- tibble::tibble(`_UTMJOINCOL` = filtered_df$`_UTMJOINCOL`, decimalLatitude = sp_geo[[2]], decimalLongitude = sp_geo[[1]], - LatLong_CRS = latlong_CRS@projargs) # Store the coordinate reference system PROJ string in the dataframe + LatLong_CRS = latlong_CRS@projargs) + # Store the coordinate reference system PROJ string in the dataframe return(latlong) diff --git a/R/remove_empty_tables.R b/R/remove_empty_tables.R index 289ea6e..a042031 100644 --- a/R/remove_empty_tables.R +++ b/R/remove_empty_tables.R @@ -11,9 +11,9 @@ #' item_b = mtcars, #' item_c = iris) #' -#' tidy_list <- removeEmptyTables(test_list) +#' tidy_list <- remove_empty_tables(test_list) #' -removeEmptyTables <- function(df_list) { +remove_empty_tables <- function(df_list) { non_empty_list <- purrr::compact(df_list) # Remove empty dataframes tables_removed <- setdiff(names(df_list), names(non_empty_list)) # Get names of removed dataframes diff --git a/R/replace_blanks.R b/R/replace_blanks.R index 85813f7..3f028d2 100644 --- a/R/replace_blanks.R +++ b/R/replace_blanks.R @@ -1,7 +1,8 @@ -#' Replaces all blank cells with NA +#' Replaces all blank cells a missing value code of your choice #' -#' @details `replace_blanks()` is particularly useful for exporting data from a -#' database (such as access) and converting it to a data package with metadata. +#' @description `replace_blanks()` is particularly useful for exporting data +#' from a database (such as access) and converting it to a data package with +#' metadata. #' #' `replace_blanks()` will import all .csv files in the specified working #' directory. The files are then written back out to the same directory, @@ -10,9 +11,23 @@ #' If no missing value is specified, the function defaults to replacing all #' blanks with "NA". #' -#' One exception is if a .csv contains NO data (i.e. just column names and no -#' data in any of the cells). In this case, the blanks will not be replaced with -#' NA (as the function cannot determine how many NAs to include). +#' Please keep in mind the "missing" is a general term for all data +#' not present in the data file or data package. Although you may have a very +#' good reason for not providing data and that data may not, from the data +#' package creator's perspective, be "missing" (maybe you never intended to +#' collect it) from a data package user's perspective any data that is not in +#' the data package is effectively "missing" from the data package. Therefore, +#' it is critical to document in metadata any data that are absent with an +#' appropriate "missingValueCode" and "missingValueDefinition". These terms are +#' defined by the metadata schema and are broadly used to apply to any data not +#' present. +#' +#' This function will replace all empty cells and all cells with NA with a +#' "missingValueCode" of your choice (although it defaults to NA). +#' +#' @details One exception is if a .csv contains NO data (i.e. just column names +#' and no data in any of the cells). In this case, the blanks will not be +#' replaced with NA (as the function cannot determine how many NAs to include). #' #' @param directory String. Path to the file(s) to have blanks replaced with #' NAs. Defaults to the working directory of the project (here::here()) diff --git a/docs/news/index.html b/docs/news/index.html index 53daa97..af13b37 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -3,7 +3,7 @@ - +
- +
@@ -63,6 +63,10 @@

Changelog

Source: NEWS.md
+
+ +

2024-06-27 * bug fixes for generate_ll_from_utm() * add function remove_empty_tables() (and associated unit tests) * update documentation for replace blanks() to indicate it can replace blanks with more than just NA

+

2024-05-08 * Updated the replace_blanks() function to accept any missing value code a user inputs (but it still defaults to NA). 2024-04-18 * Added the function generate_ll_from_utm() which supersedes convert_utm_to_ll() and improves upon it in several ways, included accepting a column of UTMs and also returns a column of CRS along with the decimal degrees latitude and longitude. 2024-04-17 * Major updates to the DRR template including: using snake case instead of camel case for variables; updating Table 3 to only display filenames only when there are multiple files, fixed multiple issues with footnotes, added citations to NPSdataverse packages, added a section that prints the R code needed to download the data package and load it in to R. * Updated the DRR documentation to account for new variable names.

@@ -163,10 +167,10 @@
- - + + diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index eb4fb5a..96f20dc 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -5,5 +5,7 @@ articles: DRR_Purpose_and_Scope: DRR_Purpose_and_Scope.html Starting-a-DRR: Starting-a-DRR.html Using-the-DRR-Template: Using-the-DRR-Template.html + last_built: 2024-06-27T19:41Z + diff --git a/docs/reference/convert_datetime_format.html b/docs/reference/convert_datetime_format.html index 640a906..039e2c7 100644 --- a/docs/reference/convert_datetime_format.html +++ b/docs/reference/convert_datetime_format.html @@ -3,7 +3,7 @@ - +
- +
@@ -84,7 +84,7 @@

Arguments

Value

- +

A character vector of date/time format strings that can be parsed by readr or strptime.

@@ -119,10 +119,10 @@

Examples

- - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 4df6a32..2be0e41 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -3,7 +3,7 @@ - +
- +
@@ -131,13 +131,13 @@

All functions

Ordering Columns Function 03-21-2023

-

removeEmptyTables()

+

remove_empty_tables()

Remove empty tables from a list

replace_blanks()

-

Replaces all blank cells with NA

+

Replaces all blank cells a missing value code of your choice

validate_coord()

@@ -164,10 +164,10 @@

All functions

- - + + diff --git a/docs/reference/removeEmptyTables.html b/docs/reference/remove_empty_tables.html similarity index 61% rename from docs/reference/removeEmptyTables.html rename to docs/reference/remove_empty_tables.html index 7f0fc3a..02a245c 100644 --- a/docs/reference/removeEmptyTables.html +++ b/docs/reference/remove_empty_tables.html @@ -1,5 +1,5 @@ -Remove empty tables from a list — removeEmptyTables • QCkitRemove empty tables from a list — remove_empty_tables • QCkit @@ -61,7 +61,7 @@
@@ -69,7 +69,7 @@

Remove empty tables from a list

-
removeEmptyTables(df_list)
+
remove_empty_tables(df_list)
@@ -92,7 +92,7 @@

Examples

item_b = mtcars, item_c = iris) -tidy_list <- removeEmptyTables(test_list) +tidy_list <- remove_empty_tables(test_list)
diff --git a/docs/reference/replace_blanks.html b/docs/reference/replace_blanks.html index c04c91b..ec7dd61 100644 --- a/docs/reference/replace_blanks.html +++ b/docs/reference/replace_blanks.html @@ -1,9 +1,29 @@ -Replaces all blank cells with NA — replace_blanks • QCkitReplaces all blank cells a missing value code of your choice — replace_blanks • QCkit - +
- +
-

Replaces all blank cells with NA

+

replace_blanks() is particularly useful for exporting data +from a database (such as access) and converting it to a data package with +metadata.

+

replace_blanks() will import all .csv files in the specified working +directory. The files are then written back out to the same directory, +overwriting the old .csv files. Any blank cells (or cells with "NA" in the +original .csv files) will be replaced with the specified string or integer. +If no missing value is specified, the function defaults to replacing all +blanks with "NA".

+

Please keep in mind the "missing" is a general term for all data +not present in the data file or data package. Although you may have a very +good reason for not providing data and that data may not, from the data +package creator's perspective, be "missing" (maybe you never intended to +collect it) from a data package user's perspective any data that is not in +the data package is effectively "missing" from the data package. Therefore, +it is critical to document in metadata any data that are absent with an +appropriate "missingValueCode" and "missingValueDefinition". These terms are +defined by the metadata schema and are broadly used to apply to any data not +present.

+

This function will replace all empty cells and all cells with NA with a +"missingValueCode" of your choice (although it defaults to NA).

@@ -85,23 +125,15 @@

Arguments

Value

- +

list of data frames (invisibly)

Details

-

replace_blanks() is particularly useful for exporting data from a -database (such as access) and converting it to a data package with metadata.

-

replace_blanks() will import all .csv files in the specified working -directory. The files are then written back out to the same directory, -overwriting the old .csv files. Any blank cells (or cells with "NA" in the -original .csv files) will be replaced with the specified string or integer. -If no missing value is specified, the function defaults to replacing all -blanks with "NA".

-

One exception is if a .csv contains NO data (i.e. just column names and no -data in any of the cells). In this case, the blanks will not be replaced with -NA (as the function cannot determine how many NAs to include).

+

One exception is if a .csv contains NO data (i.e. just column names +and no data in any of the cells). In this case, the blanks will not be +replaced with NA (as the function cannot determine how many NAs to include).

@@ -136,10 +168,10 @@

Examples

- - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0161483..7247e4b 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -100,7 +100,7 @@ /reference/QCkit-package.html - /reference/removeEmptyTables.html + /reference/remove_empty_tables.html /reference/replace_blanks.html diff --git a/man/removeEmptyTables.Rd b/man/remove_empty_tables.Rd similarity index 77% rename from man/removeEmptyTables.Rd rename to man/remove_empty_tables.Rd index fdbd2e1..8b6b14a 100644 --- a/man/removeEmptyTables.Rd +++ b/man/remove_empty_tables.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_empty_tables.R -\name{removeEmptyTables} -\alias{removeEmptyTables} +\name{remove_empty_tables} +\alias{remove_empty_tables} \title{Remove empty tables from a list} \usage{ -removeEmptyTables(df_list) +remove_empty_tables(df_list) } \arguments{ \item{df_list}{A list of tibbles or dataframes.} @@ -21,6 +21,6 @@ test_list <- list(item_a = tibble::tibble, item_b = mtcars, item_c = iris) -tidy_list <- removeEmptyTables(test_list) +tidy_list <- remove_empty_tables(test_list) } diff --git a/man/replace_blanks.Rd b/man/replace_blanks.Rd index b331ff6..3d3a5ad 100644 --- a/man/replace_blanks.Rd +++ b/man/replace_blanks.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/replace_blanks.R \name{replace_blanks} \alias{replace_blanks} -\title{Replaces all blank cells with NA} +\title{Replaces all blank cells a missing value code of your choice} \usage{ replace_blanks(directory = here::here(), missing_val_code = NA) } @@ -16,11 +16,9 @@ NAs. Defaults to the working directory of the project (here::here())} list of data frames (invisibly) } \description{ -Replaces all blank cells with NA -} -\details{ -\code{replace_blanks()} is particularly useful for exporting data from a -database (such as access) and converting it to a data package with metadata. +\code{replace_blanks()} is particularly useful for exporting data +from a database (such as access) and converting it to a data package with +metadata. \code{replace_blanks()} will import all .csv files in the specified working directory. The files are then written back out to the same directory, @@ -29,9 +27,24 @@ original .csv files) will be replaced with the specified string or integer. If no missing value is specified, the function defaults to replacing all blanks with "NA". -One exception is if a .csv contains NO data (i.e. just column names and no -data in any of the cells). In this case, the blanks will not be replaced with -NA (as the function cannot determine how many NAs to include). +Please keep in mind the "missing" is a general term for all data +not present in the data file or data package. Although you may have a very +good reason for not providing data and that data may not, from the data +package creator's perspective, be "missing" (maybe you never intended to +collect it) from a data package user's perspective any data that is not in +the data package is effectively "missing" from the data package. Therefore, +it is critical to document in metadata any data that are absent with an +appropriate "missingValueCode" and "missingValueDefinition". These terms are +defined by the metadata schema and are broadly used to apply to any data not +present. + +This function will replace all empty cells and all cells with NA with a +"missingValueCode" of your choice (although it defaults to NA). +} +\details{ +One exception is if a .csv contains NO data (i.e. just column names +and no data in any of the cells). In this case, the blanks will not be +replaced with NA (as the function cannot determine how many NAs to include). } \examples{ \dontrun{ diff --git a/tests/testthat/test-removeEmptyTables.R b/tests/testthat/test-removeEmptyTables.R deleted file mode 100644 index b929627..0000000 --- a/tests/testthat/test-removeEmptyTables.R +++ /dev/null @@ -1,15 +0,0 @@ -test_that("removeEmptyTables works", { - my_list <- list(item_a = tibble::tibble(), - item_b = mtcars, - item_c = iris) - expect_warning(removeEmptyTables(my_list)) - expect_equal(names(suppressWarnings(removeEmptyTables(my_list))), c("item_b", "item_c")) - - my_list$item_d <- tibble::tibble() - expect_warning(removeEmptyTables(my_list)) - expect_equal(names(suppressWarnings(removeEmptyTables(my_list))), c("item_b", "item_c")) - - tidy_list <- suppressWarnings(removeEmptyTables(my_list)) - expect_no_warning(removeEmptyTables(tidy_list)) - expect_equal(removeEmptyTables(tidy_list), tidy_list) -}) diff --git a/tests/testthat/test-remove_empty_tables.R b/tests/testthat/test-remove_empty_tables.R new file mode 100644 index 0000000..d7d372f --- /dev/null +++ b/tests/testthat/test-remove_empty_tables.R @@ -0,0 +1,15 @@ +test_that("remove_empty_tables works", { + my_list <- list(item_a = tibble::tibble(), + item_b = mtcars, + item_c = iris) + expect_warning(remove_empty_tables(my_list)) + expect_equal(names(suppressWarnings(remove_empty_tables(my_list))), c("item_b", "item_c")) + + my_list$item_d <- tibble::tibble() + expect_warning(remove_empty_tables(my_list)) + expect_equal(names(suppressWarnings(remove_empty_tables(my_list))), c("item_b", "item_c")) + + tidy_list <- suppressWarnings(remove_empty_tables(my_list)) + expect_no_warning(remove_empty_tables(tidy_list)) + expect_equal(remove_empty_tables(tidy_list), tidy_list) +}) \ No newline at end of file