From 1e5aec87ebca11381fc7346160c4bb23ca339b2a Mon Sep 17 00:00:00 2001 From: Wright Date: Wed, 15 Nov 2023 17:37:51 -0700 Subject: [PATCH 1/8] Fix missing namespaces --- R/elevation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/elevation.R b/R/elevation.R index 61a681f..1fb01c1 100644 --- a/R/elevation.R +++ b/R/elevation.R @@ -37,11 +37,11 @@ get_elevation <- function(df, #test & warn for correct lat/long specification: if(force == FALSE){ - lat_test <- df2 %>% filter(df2[,1] < 0) + lat_test <- df2 %>% dplyr::filter(df2[,1] < 0) if(nrow(lat_test > 0)){ cat("Some latitudes appear to be below the equator. Make sure you correctly designated latitude and longitude.\n") } - long_test <- df2 %>% filter(df2[,2] > 0) + long_test <- df2 %>% dplyr::filter(df2[,2] > 0) if(nrow(long_test > 0)){ cat("Some latitudes appear to be in the Eastern Hemisphere. Makre sure you correctly designated latitude and longitude.\n") } From 5b637ad23c45d5285b0417cab361d8142b3f03c1 Mon Sep 17 00:00:00 2001 From: Wright Date: Wed, 15 Nov 2023 17:38:50 -0700 Subject: [PATCH 2/8] Add function to fix formatting of UTC offset in datetime strings --- R/dates_and_times.R | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 R/dates_and_times.R diff --git a/R/dates_and_times.R b/R/dates_and_times.R new file mode 100644 index 0000000..1839e72 --- /dev/null +++ b/R/dates_and_times.R @@ -0,0 +1,41 @@ +#' Fix UTC offset strings +#' +#' UTC offsets can be formatted in multiple ways (e.g. -07, -07:00, -0700) and R often struggles to parse these offsets. This function takes date/time strings with valid UTC offsets, and formats them so that they are consistent and readable by R. +#' +#' @param datetime_strings Character vector of dates in ISO 8601 format +#' +#' @return datetime_strings with UTC offsets consistently formatted to four digits (e.g. "2023-11-16T03:32:49-0700"). +#' @export +#' +#' @examples +#' datetimes <- c("2023-11-16T03:32:49+07:00","2023-11-16T03:32:49-07","2023-11-16T03:32:49","2023-11-16T03:32:49Z") +#' fix_utc_offset(datetimes) # returns c("2023-11-16T03:32:49+0700", "2023-11-16T03:32:49-0700", "2023-11-16T03:32:49", "2023-11-16T03:32:49+0000") and warns about missing offset (see third element) +#' +fix_utc_offset <- function(datetime_strings) { + datetime_strings <- stringr::str_replace_all(datetime_strings, "[−‐‑‒–—―﹘﹣-]", "-") # replace every possible type of dash with a regular minus sign + + # get UTC offset and format it as 4 digits with no special characters (e.g. 0700) + new_offsets <- datetime_strings %>% + stringr::str_extract("[Zz]|((?<=[+-])[0-9]{1,2}:?[0-9]{0,2})$") %>% + stringr::str_remove(":") %>% + stringr::str_replace("[Zz]", "0000") + new_offsets <- dplyr::case_when(nchar(new_offsets) == 1 ~ paste0("0", new_offsets, "00"), + nchar(new_offsets) == 2 ~ paste0(new_offsets, "00"), + nchar(new_offsets) == 4 ~ new_offsets, + .default = "") + if (any(new_offsets == "")) { + warning("Date strings contain missing or invalid UTC offsets") + } + + # remove old UTC offsets from date strings + datetime_strings <- datetime_strings %>% + stringr::str_remove("(?<=[+-])[0-9]{1,2}:?[0-9]{0,2}$") %>% + stringr::str_replace("[Zz](?=$)", "+") + + # add new UTC offsets + datetime_strings <- paste0(datetime_strings, new_offsets) %>% + stringr::str_remove("[+-](?=$)") # Remove trailing + or - where invalid offsets were removed + + return(datetime_strings) +} + From 19f2d12ee05e96934a74ba3917dd4b9546b76ff2 Mon Sep 17 00:00:00 2001 From: Wright Date: Wed, 15 Nov 2023 17:39:10 -0700 Subject: [PATCH 3/8] Move convert_datetime_format from DPchecker to QCkit --- R/dates_and_times.R | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/R/dates_and_times.R b/R/dates_and_times.R index 1839e72..fd5c4ba 100644 --- a/R/dates_and_times.R +++ b/R/dates_and_times.R @@ -39,3 +39,32 @@ fix_utc_offset <- function(datetime_strings) { return(datetime_strings) } +#' Convert EML date/time format string to one that R can parse +#' +#' @details `convert_datetime_format()` is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. +#' +#' @param eml_format_string A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds. +#' +#' @return A character vector of date/time format strings that can be parsed by `readr` or `strptime`. +#' @export +#' +#' @examples +#' convert_datetime_format("MM/DD/YYYY") +#' convert_datetime_format(c("MM/DD/YYYY", "YY-MM-DD")) +#' +convert_datetime_format <- function(eml_format_string) { + r_format_string <- eml_format_string %>% + stringr::str_replace_all("YYYY", "%Y") %>% + stringr::str_replace_all("YY", "%y") %>% + stringr::str_replace_all("MMM", "%b") %>% + stringr::str_replace_all("MM", "%m") %>% + stringr::str_replace_all("DD", "%d") %>% + stringr::str_replace_all("(hh)|(HH)", "%H") %>% + stringr::str_replace_all("mm", "%M") %>% + stringr::str_replace_all("(ss)|(SS)", "%S") %>% + stringr::str_replace_all("(?% # Replace M with %m, but leave %M alone + stringr::str_replace_all("D", "%d") + #stringr::str_replace_all("T", " ") + + return(r_format_string) +} \ No newline at end of file From 57c6894b6b30eb6f65309c5fdcb61b0bf008505d Mon Sep 17 00:00:00 2001 From: Wright Date: Wed, 15 Nov 2023 17:50:22 -0700 Subject: [PATCH 4/8] Add documentation for convert_datetime_format and fix_utc_offset --- NAMESPACE | 2 ++ man/convert_datetime_format.Rd | 25 +++++++++++++++++++++++++ man/fix_utc_offset.Rd | 22 ++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 man/convert_datetime_format.Rd create mode 100644 man/fix_utc_offset.Rd diff --git a/NAMESPACE b/NAMESPACE index 2e341c3..b08a7b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,8 +3,10 @@ export(DC_col_check) export(check_dc_cols) export(check_te) +export(convert_datetime_format) export(convert_long_to_utm) export(convert_utm_to_ll) +export(fix_utc_offset) export(fuzz_location) export(get_custom_flags) export(get_dc_flags) diff --git a/man/convert_datetime_format.Rd b/man/convert_datetime_format.Rd new file mode 100644 index 0000000..3821086 --- /dev/null +++ b/man/convert_datetime_format.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dates_and_times.R +\name{convert_datetime_format} +\alias{convert_datetime_format} +\title{Convert EML date/time format string to one that R can parse} +\usage{ +convert_datetime_format(eml_format_string) +} +\arguments{ +\item{eml_format_string}{A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds.} +} +\value{ +A character vector of date/time format strings that can be parsed by \code{readr} or \code{strptime}. +} +\description{ +Convert EML date/time format string to one that R can parse +} +\details{ +\code{convert_datetime_format()} is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. +} +\examples{ +convert_datetime_format("MM/DD/YYYY") +convert_datetime_format(c("MM/DD/YYYY", "YY-MM-DD")) + +} diff --git a/man/fix_utc_offset.Rd b/man/fix_utc_offset.Rd new file mode 100644 index 0000000..4805b92 --- /dev/null +++ b/man/fix_utc_offset.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dates_and_times.R +\name{fix_utc_offset} +\alias{fix_utc_offset} +\title{Fix UTC offset strings} +\usage{ +fix_utc_offset(datetime_strings) +} +\arguments{ +\item{datetime_strings}{Character vector of dates in ISO 8601 format} +} +\value{ +datetime_strings with UTC offsets consistently formatted to four digits (e.g. "2023-11-16T03:32:49-0700"). +} +\description{ +UTC offsets can be formatted in multiple ways (e.g. -07, -07:00, -0700) and R often struggles to parse these offsets. This function takes date/time strings with valid UTC offsets, and formats them so that they are consistent and readable by R. +} +\examples{ +datetimes <- c("2023-11-16T03:32:49+07:00","2023-11-16T03:32:49-07","2023-11-16T03:32:49","2023-11-16T03:32:49Z") +fix_utc_offset(datetimes) # returns c("2023-11-16T03:32:49+0700", "2023-11-16T03:32:49-0700", "2023-11-16T03:32:49", "2023-11-16T03:32:49+0000") and warns about missing offset (see third element) + +} From dc4fd33b98a38e846131f42125662e3f945519e7 Mon Sep 17 00:00:00 2001 From: Wright Date: Wed, 15 Nov 2023 17:50:50 -0700 Subject: [PATCH 5/8] set project to use roxygen by default --- QCkit.Rproj | 1 + 1 file changed, 1 insertion(+) diff --git a/QCkit.Rproj b/QCkit.Rproj index 21a4da0..eaa6b81 100644 --- a/QCkit.Rproj +++ b/QCkit.Rproj @@ -15,3 +15,4 @@ LaTeX: pdfLaTeX BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace From d9be4d5771883c0cc3f97689893d649194a049cd Mon Sep 17 00:00:00 2001 From: Wright Date: Wed, 15 Nov 2023 18:09:12 -0700 Subject: [PATCH 6/8] add regex to replace UTC offset indicators with %z --- R/dates_and_times.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/dates_and_times.R b/R/dates_and_times.R index fd5c4ba..78f0c09 100644 --- a/R/dates_and_times.R +++ b/R/dates_and_times.R @@ -63,7 +63,8 @@ convert_datetime_format <- function(eml_format_string) { stringr::str_replace_all("mm", "%M") %>% stringr::str_replace_all("(ss)|(SS)", "%S") %>% stringr::str_replace_all("(?% # Replace M with %m, but leave %M alone - stringr::str_replace_all("D", "%d") + stringr::str_replace_all("D", "%d") %>% + stringr::str_replace_all("[+-][Hh]{1,4}$", "%z") #stringr::str_replace_all("T", " ") return(r_format_string) From d9a0fd64697a86f079f48370020564b7b8a9e8c5 Mon Sep 17 00:00:00 2001 From: Wright Date: Fri, 17 Nov 2023 09:32:03 -0700 Subject: [PATCH 7/8] Add option to replace "Z" in date/time format string with "%z" Only works if fix_utc_offset is used when reading date/time strings --- R/dates_and_times.R | 15 ++++++++++----- man/convert_datetime_format.Rd | 8 +++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/dates_and_times.R b/R/dates_and_times.R index 78f0c09..416f237 100644 --- a/R/dates_and_times.R +++ b/R/dates_and_times.R @@ -41,9 +41,10 @@ fix_utc_offset <- function(datetime_strings) { #' Convert EML date/time format string to one that R can parse #' -#' @details `convert_datetime_format()` is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. +#' @details `convert_datetime_format()` is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. Note that UTC offset formats using a colon or only two digits will be parsed by this function, but if parsing datetime values from strings, you will also need to use `fix_utc_offset` to change the UTC offsets to the +/-hhhh format that R can read. #' -#' @param eml_format_string A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds. +#' @param eml_format_string A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds, +/-hhhh or +/-HHHH = UTC offset. +#' @param convert_z Should a "Z" at the end of the format string (indicating UTC) be replaced by a "%z"? Only set to `TRUE` if you plan to use `fix_utc_offset` to change "Z" in datetime strings to "+0000". #' #' @return A character vector of date/time format strings that can be parsed by `readr` or `strptime`. #' @export @@ -52,20 +53,24 @@ fix_utc_offset <- function(datetime_strings) { #' convert_datetime_format("MM/DD/YYYY") #' convert_datetime_format(c("MM/DD/YYYY", "YY-MM-DD")) #' -convert_datetime_format <- function(eml_format_string) { +convert_datetime_format <- function(eml_format_string, convert_z = FALSE) { r_format_string <- eml_format_string %>% stringr::str_replace_all("YYYY", "%Y") %>% stringr::str_replace_all("YY", "%y") %>% stringr::str_replace_all("MMM", "%b") %>% stringr::str_replace_all("MM", "%m") %>% stringr::str_replace_all("DD", "%d") %>% - stringr::str_replace_all("(hh)|(HH)", "%H") %>% + stringr::str_replace_all("(?% stringr::str_replace_all("mm", "%M") %>% stringr::str_replace_all("(ss)|(SS)", "%S") %>% stringr::str_replace_all("(?% # Replace M with %m, but leave %M alone stringr::str_replace_all("D", "%d") %>% - stringr::str_replace_all("[+-][Hh]{1,4}$", "%z") + stringr::str_replace_all("[+-][Hh]{1,2}:?[Hh]{0,2}(?=$)", "%z") # Replace UTC offset format string (e.g. -hh, -hhhh, -hh:hh) with %z. Note that R seems to only parse UTC offsets when in the format +/-hhhh. #stringr::str_replace_all("T", " ") + if (convert_z) { + r_format_string <- stringr::str_replace(r_format_string, "Z(?=$)", "%z") + } + return(r_format_string) } \ No newline at end of file diff --git a/man/convert_datetime_format.Rd b/man/convert_datetime_format.Rd index 3821086..6bb501e 100644 --- a/man/convert_datetime_format.Rd +++ b/man/convert_datetime_format.Rd @@ -4,10 +4,12 @@ \alias{convert_datetime_format} \title{Convert EML date/time format string to one that R can parse} \usage{ -convert_datetime_format(eml_format_string) +convert_datetime_format(eml_format_string, convert_z = FALSE) } \arguments{ -\item{eml_format_string}{A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds.} +\item{eml_format_string}{A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds, +/-hhhh or +/-HHHH = UTC offset.} + +\item{convert_z}{Should a "Z" at the end of the format string (indicating UTC) be replaced by a "\%z"? Only set to \code{TRUE} if you plan to use \code{fix_utc_offset} to change "Z" in datetime strings to "+0000".} } \value{ A character vector of date/time format strings that can be parsed by \code{readr} or \code{strptime}. @@ -16,7 +18,7 @@ A character vector of date/time format strings that can be parsed by \code{readr Convert EML date/time format string to one that R can parse } \details{ -\code{convert_datetime_format()} is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. +\code{convert_datetime_format()} is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. Note that UTC offset formats using a colon or only two digits will be parsed by this function, but if parsing datetime values from strings, you will also need to use \code{fix_utc_offset} to change the UTC offsets to the +/-hhhh format that R can read. } \examples{ convert_datetime_format("MM/DD/YYYY") From c01575c6839689964335948a2309d86e25550cc1 Mon Sep 17 00:00:00 2001 From: Wright Date: Tue, 19 Dec 2023 14:03:10 -0700 Subject: [PATCH 8/8] Made list of invalid dashes even more readable --- R/dates_and_times.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/dates_and_times.R b/R/dates_and_times.R index 416f237..257fa54 100644 --- a/R/dates_and_times.R +++ b/R/dates_and_times.R @@ -1,5 +1,5 @@ #' Fix UTC offset strings -#' +#' #' UTC offsets can be formatted in multiple ways (e.g. -07, -07:00, -0700) and R often struggles to parse these offsets. This function takes date/time strings with valid UTC offsets, and formats them so that they are consistent and readable by R. #' #' @param datetime_strings Character vector of dates in ISO 8601 format @@ -10,10 +10,10 @@ #' @examples #' datetimes <- c("2023-11-16T03:32:49+07:00","2023-11-16T03:32:49-07","2023-11-16T03:32:49","2023-11-16T03:32:49Z") #' fix_utc_offset(datetimes) # returns c("2023-11-16T03:32:49+0700", "2023-11-16T03:32:49-0700", "2023-11-16T03:32:49", "2023-11-16T03:32:49+0000") and warns about missing offset (see third element) -#' +#' fix_utc_offset <- function(datetime_strings) { - datetime_strings <- stringr::str_replace_all(datetime_strings, "[−‐‑‒–—―﹘﹣-]", "-") # replace every possible type of dash with a regular minus sign - + datetime_strings <- stringr::str_replace_all(datetime_strings, "[\u2212\u2010\u2011\u2012\u2013\u2014\u2015\ufe58\ufe63\uff0d]", "-") # replace every possible type of dash with a regular minus sign + # get UTC offset and format it as 4 digits with no special characters (e.g. 0700) new_offsets <- datetime_strings %>% stringr::str_extract("[Zz]|((?<=[+-])[0-9]{1,2}:?[0-9]{0,2})$") %>% @@ -26,16 +26,16 @@ fix_utc_offset <- function(datetime_strings) { if (any(new_offsets == "")) { warning("Date strings contain missing or invalid UTC offsets") } - + # remove old UTC offsets from date strings datetime_strings <- datetime_strings %>% stringr::str_remove("(?<=[+-])[0-9]{1,2}:?[0-9]{0,2}$") %>% stringr::str_replace("[Zz](?=$)", "+") - + # add new UTC offsets datetime_strings <- paste0(datetime_strings, new_offsets) %>% stringr::str_remove("[+-](?=$)") # Remove trailing + or - where invalid offsets were removed - + return(datetime_strings) } @@ -44,7 +44,7 @@ fix_utc_offset <- function(datetime_strings) { #' @details `convert_datetime_format()` is not a sophisticated function. If the EML format string is not valid, it will happily and without complaint return an R format string that will break your code. You have been warned. Note that UTC offset formats using a colon or only two digits will be parsed by this function, but if parsing datetime values from strings, you will also need to use `fix_utc_offset` to change the UTC offsets to the +/-hhhh format that R can read. #' #' @param eml_format_string A character vector of EML date/time format strings. This function understands the following codes: YYYY = four digit year, YY = two digit year, MMM = three letter month abbrev., MM = two digit month, DD = two digit day, hh or HH = 24 hour time, mm = minutes, ss or SS = seconds, +/-hhhh or +/-HHHH = UTC offset. -#' @param convert_z Should a "Z" at the end of the format string (indicating UTC) be replaced by a "%z"? Only set to `TRUE` if you plan to use `fix_utc_offset` to change "Z" in datetime strings to "+0000". +#' @param convert_z Should a "Z" at the end of the format string (indicating UTC) be replaced by a "%z"? Only set to `TRUE` if you plan to use `fix_utc_offset` to change "Z" in datetime strings to "+0000". #' #' @return A character vector of date/time format strings that can be parsed by `readr` or `strptime`. #' @export @@ -67,10 +67,10 @@ convert_datetime_format <- function(eml_format_string, convert_z = FALSE) { stringr::str_replace_all("D", "%d") %>% stringr::str_replace_all("[+-][Hh]{1,2}:?[Hh]{0,2}(?=$)", "%z") # Replace UTC offset format string (e.g. -hh, -hhhh, -hh:hh) with %z. Note that R seems to only parse UTC offsets when in the format +/-hhhh. #stringr::str_replace_all("T", " ") - + if (convert_z) { r_format_string <- stringr::str_replace(r_format_string, "Z(?=$)", "%z") } - + return(r_format_string) } \ No newline at end of file