diff --git a/.Rbuildignore b/.Rbuildignore index b614d7e..03e6439 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,6 +6,7 @@ .github/ ^docs$ ^_pkgdown\.yml$ +^pkgdown$ ^\.travis\.yml$ ^appveyor\.yml$ ^\.zenodo\.json$ diff --git a/.zenodo.json b/.zenodo.json index 24ea8a4..70fa8db 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -4,7 +4,7 @@ "license": "GPL-3.0", "upload_type": "software", "access_right": "open", - "version": "0.3.0", + "version": "0.4.0", "creators": [ { "name": "Vanderhaeghe, Floris", diff --git a/DESCRIPTION b/DESCRIPTION index 93b56d6..3ce55ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: watina Title: Querying And Processing Data From The INBO Watina Database -Version: 0.3.0 +Version: 0.4.0 Description: The R-package watina contains functions to query and process data from the Watina database at the Research Institute for Nature and Forest (INBO). This database primarily provides @@ -34,11 +34,12 @@ Imports: tidyr Suggests: knitr, - rmarkdown + purrr, + rmarkdown, + tidyselect Remotes: - inbo/inbodb, - florisvdh/dbplyr@dbplyr_with_pivot_wider + inbo/inbodb LazyData: true Encoding: UTF-8 -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 041112f..9a40c7e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,9 +2,8 @@ export(as_points) export(cluster_locs) -export(collect) export(connect_watina) -export(convertdf_enc) +export(dbDisconnect) export(eval_chem) export(eval_xg3_avail) export(eval_xg3_series) @@ -23,12 +22,12 @@ importFrom(assertthat,is.flag) importFrom(assertthat,is.number) importFrom(assertthat,is.string) importFrom(assertthat,noNA) -importFrom(dbplyr,db_pivot_wider) importFrom(dplyr,"%>%") importFrom(dplyr,anti_join) importFrom(dplyr,arrange) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_cols) +importFrom(dplyr,collect) importFrom(dplyr,contains) importFrom(dplyr,copy_to) importFrom(dplyr,count) @@ -43,7 +42,6 @@ importFrom(dplyr,lead) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_at) -importFrom(dplyr,mutate_if) importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,rename) @@ -60,6 +58,7 @@ importFrom(dplyr,tribble) importFrom(dplyr,ungroup) importFrom(dplyr,vars) importFrom(inbodb,connect_inbo_dbase) +importFrom(inbodb,dbDisconnect) importFrom(lubridate,as_date) importFrom(lubridate,day) importFrom(lubridate,dmy) @@ -68,6 +67,14 @@ importFrom(lubridate,now) importFrom(lubridate,today) importFrom(lubridate,year) importFrom(rlang,.data) +importFrom(rlang,enquo) +importFrom(rlang,enquos) +importFrom(rlang,expr) +importFrom(rlang,flatten) +importFrom(rlang,quo_get_expr) +importFrom(rlang,set_names) +importFrom(rlang,sym) +importFrom(rlang,syms) importFrom(sf,st_as_sf) importFrom(sf,st_buffer) importFrom(sf,st_coordinates) diff --git a/NEWS.md b/NEWS.md index 1ff4ab5..4d46f7c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,20 @@ -# watina 0.3.0 +# watina 0.4.0 (2021-01-18) + +- This release has been made compatible with `dbplyr` 2.0.0 (on CRAN); the `dbplyr` fork is not needed anymore ([e66e58f](https://github.com/inbo/watina/commit/e66e58f), #74). + - Follow the installation procedure on the homepage (readme) to upgrade. + - Lazy results of `get_locs()`, `get_xg3()` and `get_chem()` are not sorted anymore. + Sorting is done in tibbles only, i.e. if `collect = TRUE`. + For more information, consult the _Note_ added in the documentation of these functions. +- Fix broken `selectlocs_xg3()` and `selectlocs_chem()` (#73). +- Fix `get_locs()` error when no locations remain after spatial masking ([c96421e](https://github.com/inbo/watina/commit/c96421e)). +- Fix `get_locs()` error when `obswell_aggr = "mean"` (#80). +- Adopt further [`inbodb`](https://inbo.github.io/inbodb) functionality (#75): + - re-export its `dbDisconnect()`; + - transfer handling of character encoding to `inbodb`. +- Improve documentation and `pkgdown` website (#76, #77, [3986b4e](https://github.com/inbo/watina/commit/3986b4e)). + + +# watina 0.3.0 (2020-05-20) #### New features @@ -14,34 +30,34 @@ Further, a number of smaller fixes and enhancements were made. -# watina 0.2.6 +# watina 0.2.6 (2020-02-28) - Redo fix `get_locs()`: calculation of `soilsurf_ost` (#43) -# watina 0.2.5 +# watina 0.2.5 (2020-02-27) - Bugfix in `get_locs()`: calculation of `soilsurf_ost` (#42) -# watina 0.2.4 +# watina 0.2.4 (2020-01-29) - Bugfix in `convertdf_enc()` (#34) - Some small documentation improvements -# watina 0.2.3 +# watina 0.2.3 (2020-01-09) - Documentation is now generated by the newer `roxygen2` version `7.0.2`, resulting in a better layout of function arguments in the 'usage' section. -# watina 0.2.2 +# watina 0.2.2 (2019-11-04) - On Windows, the functions now convert 'weird' characters from the database to proper UTF-8. - Bugfix (#29) in `get_locs()` regarding the default implementation of the `loc_validity` argument. -# watina 0.2.1 +# watina 0.2.1 (2019-10-14) - Fixed bug in some hyperlinks in function documentation, affecting the installation process on Windows (warnings were thrown). -# watina 0.2.0 +# watina 0.2.0 (2019-10-02) #### New features diff --git a/R/cluster.R b/R/cluster.R index 7121a97..d237372 100644 --- a/R/cluster.R +++ b/R/cluster.R @@ -81,7 +81,7 @@ #' table #' #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export diff --git a/R/connect_watina.R b/R/connect_watina.R index 42c8c10..2ff27ec 100644 --- a/R/connect_watina.R +++ b/R/connect_watina.R @@ -4,7 +4,7 @@ #' The function can only be used from within the INBO network. #' #' Don't forget to disconnect at the end of your R-script using -#' \code{\link[DBI:dbDisconnect]{DBI::dbDisconnect()}}! +#' \code{\link{dbDisconnect}}! #' #' @return #' A \code{DBIConnection} object. @@ -14,12 +14,26 @@ #' watina <- connect_watina() #' # Do your stuff. #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export #' @importFrom inbodb connect_inbo_dbase connect_watina <- function() { - connect_inbo_dbase("W0002_00_Watina") + connect_inbo_dbase("W0002_00_Watina", + autoconvert_utf8 = TRUE) } + + +#' Disconnect a database connection +#' +#' This is a re-export of +#' \code{\link[inbodb:dbDisconnect-OdbcConnection-method]{inbodb::dbDisconnect()}} +#' (\href{https://inbo.github.io/inbodb/reference/dbDisconnect-OdbcConnection-method.html}{url}). +#' +#' @name dbDisconnect +#' @keywords documentation +#' @importFrom inbodb dbDisconnect +#' @export dbDisconnect +NULL diff --git a/R/db_pivot_wider.R b/R/db_pivot_wider.R new file mode 100644 index 0000000..2921e77 --- /dev/null +++ b/R/db_pivot_wider.R @@ -0,0 +1,82 @@ +# NOTE: all credits for this db_pivot_wider function go to Edgar Ruiz. +# He made a PR where this function was proposed to incorporate in dbplyr: +# https://github.com/tidyverse/dbplyr/pull/344 +# The code below is a copy and the terms of its original code source apply. +# Minor modifications were made in the approach to import rlang functions and to +# avoid global variables. + +#' @importFrom rlang +#' .data +#' sym +#' syms +#' set_names +#' flatten +#' enquo +#' enquos +#' quo_get_expr +#' expr +#' @keywords internal +db_pivot_wider <- function(data, + id_cols = NULL, + names_from = .data$name, + names_prefix = "", + names_sep = NULL, + names_repair = NULL, + values_from = .data$value, + values_fill = NULL, + values_fn = NULL, + spec = NULL) { + if (!requireNamespace("tidyselect", quietly = TRUE)) { + stop("Package \"tidyselect\" is needed when using this function. ", + "Please install it.", + call. = FALSE) + } + if (!requireNamespace("purrr", quietly = TRUE)) { + stop("Package \"purrr\" is needed when using this function. ", + "Please install it.", + call. = FALSE) + } + check_null_pivot_args( + id_cols = !!id_cols, names_sep = !!names_sep, + names_repair = !!names_repair, values_fill = !!values_fill, + values_fn = !!values_fn, spec = !!spec + ) + cn <- colnames(data) + names_from <- tidyselect::vars_select(cn, !!enquo(names_from)) + values_from <- tidyselect::vars_select(cn, !!enquo(values_from)) + pl <- c(values_from, names_from) + kp <- cn[!(cn %in% pl)] + headers <- pull(summarise(group_by(data, !!sym(names_from)))) + mt <- purrr::map( + headers, + ~ { + header <- .x + purrr::map( + values_from, + ~ expr(max(ifelse(!!sym(names_from) == !!header, !!sym(.x), NA), na.rm = TRUE)) + ) + } + ) + fmt <- flatten(mt) + if (length(values_from) > 1) { + vp <- paste0(values_from, "_") + } else { + vp <- "" + } + hn <- purrr::map(headers, ~ paste0(vp, names_prefix, .x)) + rhn <- purrr::reduce(hn, c) + nmt <- set_names(fmt, rhn) + grps <- group_by(data, !!!syms(kp)) + summarise(grps, !!!nmt) +} + +check_null_pivot_args <- function(..., msg = "The `{arg}` argument is not supported for remote back-ends") { + vars <- enquos(...) + purrr::imap( + vars, + ~ assert_that( + is.null(quo_get_expr(.x)), + msg = sub("\\{arg\\}", .y, msg) + ) + ) +} diff --git a/R/encode_utf8.R b/R/encode_utf8.R deleted file mode 100644 index 4c6f0c4..0000000 --- a/R/encode_utf8.R +++ /dev/null @@ -1,104 +0,0 @@ -#' Convert encoding of character and factor variables in a dataframe -#' -#' @details -#' Encoding strings: all \code{R} platforms support \code{""} (for the -#' encoding of the current -#' locale), \code{"latin1"} and \code{"UTF-8"}. -#' See \code{\link[base]{iconv}} for more information. -#' -#' @param x A dataframe or an object (such as `sf`) with the `data.frame` -#' class -#' -#' @inheritParams base::iconv -#' -#' @md -#' -#' @return -#' The original object, with character variables (and levels of -#' (character) factor variables) converted to the specified encoding. -#' -#' @export -#' @importFrom dplyr -#' %>% -#' mutate_if -#' @importFrom assertthat -#' assert_that -#' is.string -convertdf_enc <- function(x, - from = "", - to = "UTF-8", - sub = NA) { - - assert_that(inherits(x, "data.frame")) - assert_that(is.string(to)) - - is_chfact <- function(vec) { - if (is.factor(vec)) { - is.character(levels(vec)) - } else FALSE - } - - conv_levels <- function(fact, from, to, sub) { - levels(fact) <- iconv(levels(fact), - from = from, - to = to, - sub = sub) - return(fact) - } - - x %>% - mutate_if(is.character, - iconv, - from = from, - to = to, - sub = sub) %>% - mutate_if(is_chfact, - conv_levels, - from = from, - to = to, - sub = sub) - -} - - - - - - - -#' A variant of dplyr's collect() which converts dataframes to UTF-8 encoding -#' if OS is Windows -#' -#' Works as a simple \code{\link[dplyr:compute]{collect()}}, on which it is -#' based. -#' However, on a Windows OS the \code{collect()} function exported -#' by this package will convert character and factor -#' variables of dataframes to \code{UTF-8} encoding. -#' -#' The functions in this package that \emph{collect} a \code{tbl_lazy} object, -#' e.g. when \code{collect = TRUE}, do so by using this function. -#' As a convenience to the user, the function is exported to allow manual -#' implementation. -#' -#' The function \code{\link[=convertdf_enc]{convertdf_enc()}} is the workhorse -#' for the encoding conversion. -#' -#' @inheritParams dplyr::collect -#' -#' @export -#' @importFrom dplyr -#' %>% -collect <- function(x) { - result <- - dplyr::collect(x) - - if (.Platform$OS.type == "windows") { - result <- - result %>% - convertdf_enc(to = "UTF-8") - } - - return(result) -} - - diff --git a/R/eval.R b/R/eval.R index f8060a3..87e01e9 100644 --- a/R/eval.R +++ b/R/eval.R @@ -33,11 +33,11 @@ #' mydata <- #' mylocs %>% #' get_xg3(watina, 2014) -#' mydata +#' mydata %>% arrange(loc_code, hydroyear) #' eval_xg3_avail(mydata, #' xg3_type = c("L", "V")) #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @@ -200,6 +200,7 @@ filter_xg3 <- function(data, #' contains #' vars #' mutate_at +#' collect qualify_xg3 <- function(data, xg3_type) { @@ -340,13 +341,13 @@ qualify_xg3 <- function(data, #' mydata <- #' mylocs %>% #' get_xg3(watina, 1900) -#' mydata +#' mydata %>% arrange(loc_code, hydroyear) #' mydata %>% #' eval_xg3_series(xg3_type = c("L", "V"), #' max_gap = 2, #' min_dur = 5) #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export @@ -375,6 +376,7 @@ qualify_xg3 <- function(data, #' n #' first #' ungroup +#' collect eval_xg3_series <- function(data, xg3_type = c("L", "H", "V"), max_gap, @@ -591,7 +593,7 @@ eval_xg3_series <- function(data, #' mydata <- #' mylocs %>% #' get_chem(watina, "1/1/2010") -#' mydata +#' mydata %>% arrange(loc_code, date, chem_variable) #' mydata %>% #' pull(date) %>% #' lubridate::year(.) %>% @@ -611,7 +613,7 @@ eval_xg3_series <- function(data, #' arrange(desc(loc_code)) %>% #' select(loc_code, chem_variable, pval_uniform_totalspan) #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @@ -631,6 +633,7 @@ eval_xg3_series <- function(data, #' left_join #' filter #' first +#' collect #' @importFrom tidyr #' spread #' gather @@ -676,7 +679,11 @@ date, lab_sample_id, chem_variable, value, unit, below_loq." filter(.data$chem_variable %in% chem_var) if (inherits(data, "tbl_lazy")) { - data <- collect(data) + data <- + collect(data) %>% + arrange(.data$loc_code, + .data$date, + .data$chem_variable) } if (!missing(chem_var)) { diff --git a/R/extract.R b/R/extract.R index 9bfc2ce..a19806b 100644 --- a/R/extract.R +++ b/R/extract.R @@ -69,13 +69,13 @@ #' mydata <- #' mylocs %>% #' get_xg3(watina, 1900) -#' mydata +#' mydata %>% arrange(loc_code, hydroyear) #' mydata %>% #' extract_xg3_series(xg3_type = c("L", "V"), #' max_gap = 2, #' min_dur = 5) #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export diff --git a/R/get.R b/R/get.R index 62748ca..e18f3ca 100644 --- a/R/get.R +++ b/R/get.R @@ -139,6 +139,18 @@ #' retained observation well. #' It is ignored if \code{obswells = TRUE}. #' +#' @md +#' +#' @note +#' Up to and including `watina 0.3.0`, the result was sorted according to +#' `area_code` and `loc_code`, +#' both for the lazy query and the collected result. +#' Later versions avoid sorting in case of a lazy result, because +#' otherwise, when using the result inside another lazy query, this led to +#' 'ORDER BY' constructs in SQL subqueries, which must be avoided. +#' If you like to print the lazy object in a sorted manner, you must add +#' `%>% arrange(...)` yourself. +#' #' @param mask An optional geospatial filter of class \code{sf}. #' If provided, only locations that intersect with \code{mask} will be returned, #' with the value of \code{buffer} taken into account. @@ -199,7 +211,8 @@ #' bbox = c(xmin = 1.4e+5, #' xmax = 1.7e+5, #' ymin = 1.6e+5, -#' ymax = 1.9e+5)) +#' ymax = 1.9e+5)) %>% +#' arrange(area_code, loc_code) #' #' get_locs(watina, #' area_codes = c("KAL", "KBR"), @@ -262,22 +275,29 @@ #' head(12) #' #' # Selecting all piezometers with status VLD of the -#' # province "West-Vlaanderen": -#' data(BE_ADMIN_PROVINCE, -#' package = "BelgiumMaps.StatBel") +#' # province "West-Vlaanderen" (current polygon taken +#' # from the official WFS service): #' library(sf) -#' library(stringr) +#' library(purrr) +#' library(httr) #' mymask <- -#' st_as_sf(BE_ADMIN_PROVINCE) %>% -#' filter(str_detect(TX_PROV_DESCR_NL, "West")) %>% -#' st_transform(crs = 31370) +#' "https://geoservices.informatievlaanderen.be/overdrachtdiensten/VRBG/wfs" %>% +#' parse_url() %>% +#' list_merge(query = list(request = "GetFeature", +#' typeName = "VRBG:Refprv", +#' cql_filter="NAAM='West-Vlaanderen'", +#' srsName = "EPSG:31370", +#' outputFormat = "text/xml; subtype=gml/3.1.1")) %>% +#' build_url() %>% +#' read_sf(crs = 31370) %>% +#' st_cast("GEOMETRYCOLLECTION") #' get_locs(watina, #' loc_validity = "VLD", #' mask = mymask, #' buffer = 0) #' #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export @@ -455,10 +475,7 @@ get_locs <- function(con, measuringref_ost = .data$ReferentieNiveauTAW, .data$tubelength, .data$filterlength, - .data$filterdepth) %>% - arrange(.data$area_code, - .data$loc_code, - .data$obswell_rank) + .data$filterdepth) if (filterdepth_guess) { locs <- @@ -493,12 +510,6 @@ get_locs <- function(con, ) } - locs <- - locs %>% - arrange(.data$area_code, - .data$loc_code, - .data$obswell_rank) - if (!obswells) { locs <- @@ -586,8 +597,9 @@ get_locs <- function(con, sql("CAST( filterdepth_guessed AS bit)")) } else .} %>% - filter(row_number() == 1L) %>% - ungroup() + ungroup() %>% + filter(.data$obswell_count == 1 | + .data$obswell_rank == .data$obswell_maxrank) ) %>% select(-.data$obswell_code, @@ -597,9 +609,7 @@ get_locs <- function(con, -.data$obswell_count, -.data$obswell_maxrank, -.data$obswell_maxrank_fd, - -.data$obswell_maxrank_sso) %>% - arrange(.data$area_code, - .data$loc_code) + -.data$obswell_maxrank_sso) } if (!is.null(mask)) { @@ -701,6 +711,18 @@ get_locs <- function(con, #' Why truncate, and why truncate by default? #' When to choose which \code{vert_crs}?) #' +#' @md +#' +#' @note +#' Up to and including `watina 0.3.0`, the result was sorted according to +#' `loc_code` and `hydroyear`, both for the lazy query and the +#' collected result. +#' Later versions avoid sorting in case of a lazy result, because +#' otherwise, when using the result inside another lazy query, this led to +#' 'ORDER BY' constructs in SQL subqueries, which must be avoided. +#' If you like to print the lazy object in a sorted manner, you must add +#' `%>% arrange(...)` yourself. +#' #' @param locs A \code{tbl_lazy} object or a dataframe, with at least a column #' \code{loc_code} that defines the locations for which values are to be #' returned. @@ -746,20 +768,25 @@ get_locs <- function(con, #' watina <- connect_watina() #' library(dplyr) #' mylocs <- get_locs(watina, area_codes = "KAL") -#' mylocs %>% get_xg3(watina, 2010) +#' mylocs %>% +#' get_xg3(watina, 2010) %>% +#' arrange(loc_code, hydroyear) #' mylocs %>% get_xg3(watina, 2010, collect = TRUE) -#' mylocs %>% get_xg3(watina, 2010, vert_crs = "ostend") +#' mylocs %>% +#' get_xg3(watina, 2010, vert_crs = "ostend") %>% +#' arrange(loc_code, hydroyear) #' #' # joining results to mylocs: #' mylocs %>% -#' get_xg3(watina, 2010) %>% -#' left_join(mylocs %>% -#' select(-loc_wid), -#' .) %>% -#' collect +#' get_xg3(watina, 2010) %>% +#' left_join(mylocs %>% +#' select(-loc_wid), +#' .) %>% +#' collect %>% +#' arrange(loc_code, hydroyear) #' #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export @@ -853,13 +880,13 @@ get_xg3 <- function(locs, local = xg3 %>% select(-contains("ost")), ostend = xg3 %>% select(-contains("lcl")), both = xg3 - ) %>% - arrange(.data$loc_code, - .data$hydroyear) + ) if (collect) { xg3 <- xg3 %>% + arrange(.data$loc_code, + .data$hydroyear) %>% collect } @@ -906,6 +933,18 @@ get_xg3 <- function(locs, #' #' TO BE ADDED: What is electroneutrality and why is it used as a criterion? #' +#' @md +#' +#' @note +#' Up to and including `watina 0.3.0`, the result was sorted according to +#' `loc_code`, `date` and `chem_variable`, both for the lazy query and the +#' collected result. +#' Later versions avoid sorting in case of a lazy result, because +#' otherwise, when using the result inside another lazy query, this led to +#' 'ORDER BY' constructs in SQL subqueries, which must be avoided. +#' If you like to print the lazy object in a sorted manner, you must add +#' `%>% arrange(...)` yourself. +#' #' @param startdate First date of the timeframe, as a string. #' The string must use a formatting of the order 'day month year', #' i.e. a format which can be interpreted by \code{\link[lubridate:ymd]{dmy}}. @@ -987,9 +1026,14 @@ get_xg3 <- function(locs, #' watina <- connect_watina() #' library(dplyr) #' mylocs <- get_locs(watina, area_codes = "ZWA") -#' mylocs %>% get_chem(watina, "1/1/2017") -#' mylocs %>% get_chem(watina, "1/1/2017", collect = TRUE) -#' mylocs %>% get_chem(watina, "1/1/2017", conc_type = "eq") +#' mylocs %>% +#' get_chem(watina, "1/1/2017") %>% +#' arrange(loc_code, date, chem_variable) +#' mylocs %>% +#' get_chem(watina, "1/1/2017", collect = TRUE) +#' mylocs %>% +#' get_chem(watina, "1/1/2017", conc_type = "eq") %>% +#' arrange(loc_code, date, chem_variable) #' #' # compare the number of returned rows: #' mylocs %>% get_chem(watina, "1/1/2017") %>% count @@ -1009,10 +1053,11 @@ get_xg3 <- function(locs, #' left_join(mylocs %>% #' select(-loc_wid), #' .) %>% -#' collect +#' collect %>% +#' arrange(loc_code, date, chem_variable) #' #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export @@ -1029,8 +1074,6 @@ get_xg3 <- function(locs, #' day #' month #' year -#' @importFrom dbplyr -#' db_pivot_wider #' @importFrom dplyr #' %>% #' copy_to @@ -1260,14 +1303,14 @@ get_chem <- function(locs, .data$unit)) ) %>% select(-contains("value_"), -.data$provide_eq_unit) %>% - mutate(unit = ifelse(.data$unit == "/", NA, .data$unit)) %>% - arrange(.data$loc_code, - .data$date, - .data$chem_variable) + mutate(unit = ifelse(.data$unit == "/", NA, .data$unit)) if (collect) { chem <- chem %>% + arrange(.data$loc_code, + .data$date, + .data$chem_variable) %>% collect } diff --git a/R/selectlocs.R b/R/selectlocs.R index 052c4fd..51734f1 100644 --- a/R/selectlocs.R +++ b/R/selectlocs.R @@ -252,7 +252,7 @@ #' mydata <- #' mylocs %>% #' get_xg3(watina, 2000) -#' mydata +#' mydata %>% arrange(loc_code, hydroyear) #' # Number of locations in mydata: #' mydata %>% distinct(loc_code) %>% count #' # Number of hydrological years per location and XG3 variable: @@ -290,7 +290,7 @@ #' result$combined_result_filtered #' result[2:4] #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export @@ -553,10 +553,11 @@ selectlocs_xg3 <- function(data, filter(str_detect(.data$statistic, "ser_")), by = c("xg3_variable", "statistic")) %>% - complete(.data$loc_code, nesting(.data$xg3_variable, - .data$statistic, - .data$criterion, - .data$direction)) %>% + complete(.data$loc_code, with(.data, + nesting(xg3_variable, + statistic, + criterion, + direction))) %>% mutate(cond_met = ifelse(.data$direction == "min", .data$value >= .data$criterion, ifelse(.data$direction == "max", .data$value <= .data$criterion, @@ -875,7 +876,7 @@ selectlocs_xg3 <- function(data, #' mydata <- #' mylocs %>% #' get_chem(watina, "1/1/2010") -#' mydata +#' mydata %>% arrange(loc_code, date, chem_variable) #' mydata %>% #' pull(date) %>% #' lubridate::year(.) %>% @@ -924,7 +925,7 @@ selectlocs_xg3 <- function(data, #' conditions = conditions_df) #' #' # Disconnect: -#' DBI::dbDisconnect(watina) +#' dbDisconnect(watina) #' } #' #' @export @@ -1225,10 +1226,11 @@ selectlocs <- function(data, right_join(conditions, by = c("variable", "statistic")) %>% - complete(.data$loc_code, nesting(.data$variable, - .data$statistic, - .data$criterion, - .data$direction)) %>% + complete(.data$loc_code, with(.data, + nesting(variable, + statistic, + criterion, + direction))) %>% mutate(cond_met = ifelse(.data$direction == "min", .data$value >= .data$criterion, ifelse(.data$direction == "max", .data$value <= .data$criterion, diff --git a/R/sf.R b/R/sf.R index cef60a8..d79da8a 100644 --- a/R/sf.R +++ b/R/sf.R @@ -105,7 +105,6 @@ warn_xy_duplicates <- function(x, y) { assert_that(is.numeric(x)) assert_that(is.numeric(y)) assert_that(all.equal(length(x), length(y))) - assert_that(length(x) > 0) n_duplicated <- data.frame(x = x, y = y) %>% @@ -118,7 +117,7 @@ warn_xy_duplicates <- function(x, y) { warning("1 coordinate pair occurs more than once.") } else { warning(n_duplicated, - " different coordinate pairs occur more than once.") + " different coordinate pairs occur more than once.\n") } } diff --git a/_pkgdown.yml b/_pkgdown.yml index d75147c..954765f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,6 @@ template: params: - bootswatch: yeti + bootswatch: cosmo url: https://inbo.github.io/watina @@ -11,6 +11,7 @@ reference: - title: "Connect" contents: - connect_watina + - dbDisconnect - title: "Get locations" contents: - get_locs @@ -30,5 +31,3 @@ reference: - as_points - cluster_locs - extract_xg3_series - - collect - - convertdf_enc diff --git a/man/cluster_locs.Rd b/man/cluster_locs.Rd index d770fba..cef9be7 100644 --- a/man/cluster_locs.Rd +++ b/man/cluster_locs.Rd @@ -100,7 +100,7 @@ clusters \%>\% table # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/collect.Rd b/man/collect.Rd deleted file mode 100644 index 24da75c..0000000 --- a/man/collect.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/encode_utf8.R -\name{collect} -\alias{collect} -\title{A variant of dplyr's collect() which converts dataframes to UTF-8 encoding -if OS is Windows} -\usage{ -collect(x) -} -\arguments{ -\item{x}{A tbl} -} -\description{ -Works as a simple \code{\link[dplyr:compute]{collect()}}, on which it is -based. -However, on a Windows OS the \code{collect()} function exported -by this package will convert character and factor -variables of dataframes to \code{UTF-8} encoding. -} -\details{ -The functions in this package that \emph{collect} a \code{tbl_lazy} object, -e.g. when \code{collect = TRUE}, do so by using this function. -As a convenience to the user, the function is exported to allow manual -implementation. - -The function \code{\link[=convertdf_enc]{convertdf_enc()}} is the workhorse -for the encoding conversion. -} diff --git a/man/connect_watina.Rd b/man/connect_watina.Rd index 9b137e3..4fa34f2 100644 --- a/man/connect_watina.Rd +++ b/man/connect_watina.Rd @@ -15,14 +15,14 @@ The function can only be used from within the INBO network. } \details{ Don't forget to disconnect at the end of your R-script using -\code{\link[DBI:dbDisconnect]{DBI::dbDisconnect()}}! +\code{\link{dbDisconnect}}! } \examples{ \dontrun{ watina <- connect_watina() # Do your stuff. # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/convertdf_enc.Rd b/man/convertdf_enc.Rd deleted file mode 100644 index 95dcba1..0000000 --- a/man/convertdf_enc.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/encode_utf8.R -\name{convertdf_enc} -\alias{convertdf_enc} -\title{Convert encoding of character and factor variables in a dataframe} -\usage{ -convertdf_enc(x, from = "", to = "UTF-8", sub = NA) -} -\arguments{ -\item{x}{A dataframe or an object (such as \code{sf}) with the \code{data.frame} -class} - -\item{from}{A character string describing the current encoding.} - -\item{to}{A character string describing the target encoding.} - -\item{sub}{character string. If not \code{NA} it is used to replace - any non-convertible bytes in the input. (This would normally be a - single character, but can be more.) If \code{"byte"}, the indication is - \code{""} with the hex code of the byte.} -} -\value{ -The original object, with character variables (and levels of -(character) factor variables) converted to the specified encoding. -} -\description{ -Convert encoding of character and factor variables in a dataframe -} -\details{ -Encoding strings: all \code{R} platforms support \code{""} (for the -encoding of the current -locale), \code{"latin1"} and \code{"UTF-8"}. -See \code{\link[base]{iconv}} for more information. -} diff --git a/man/dbDisconnect.Rd b/man/dbDisconnect.Rd new file mode 100644 index 0000000..b3cad7b --- /dev/null +++ b/man/dbDisconnect.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/connect_watina.R +\name{dbDisconnect} +\alias{dbDisconnect} +\title{Disconnect a database connection} +\description{ +This is a re-export of +\code{\link[inbodb:dbDisconnect-OdbcConnection-method]{inbodb::dbDisconnect()}} +(\href{https://inbo.github.io/inbodb/reference/dbDisconnect-OdbcConnection-method.html}{url}). +} +\keyword{documentation} diff --git a/man/eval_chem.Rd b/man/eval_chem.Rd index d13514e..747053f 100644 --- a/man/eval_chem.Rd +++ b/man/eval_chem.Rd @@ -117,7 +117,7 @@ mylocs <- get_locs(watina, area_codes = "ZWA") mydata <- mylocs \%>\% get_chem(watina, "1/1/2010") -mydata +mydata \%>\% arrange(loc_code, date, chem_variable) mydata \%>\% pull(date) \%>\% lubridate::year(.) \%>\% @@ -137,7 +137,7 @@ mydata \%>\% arrange(desc(loc_code)) \%>\% select(loc_code, chem_variable, pval_uniform_totalspan) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } diff --git a/man/eval_xg3_avail.Rd b/man/eval_xg3_avail.Rd index 84c1962..0f42f71 100644 --- a/man/eval_xg3_avail.Rd +++ b/man/eval_xg3_avail.Rd @@ -45,11 +45,11 @@ mylocs <- get_locs(watina, area_codes = "KAL") mydata <- mylocs \%>\% get_xg3(watina, 2014) -mydata +mydata \%>\% arrange(loc_code, hydroyear) eval_xg3_avail(mydata, xg3_type = c("L", "V")) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } diff --git a/man/eval_xg3_series.Rd b/man/eval_xg3_series.Rd index 89f1a44..8ed3486 100644 --- a/man/eval_xg3_series.Rd +++ b/man/eval_xg3_series.Rd @@ -117,13 +117,13 @@ mylocs <- get_locs(watina, area_codes = "KAL") mydata <- mylocs \%>\% get_xg3(watina, 1900) -mydata +mydata \%>\% arrange(loc_code, hydroyear) mydata \%>\% eval_xg3_series(xg3_type = c("L", "V"), max_gap = 2, min_dur = 5) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/extract_xg3_series.Rd b/man/extract_xg3_series.Rd index dde4b1b..96f18bc 100644 --- a/man/extract_xg3_series.Rd +++ b/man/extract_xg3_series.Rd @@ -82,13 +82,13 @@ mylocs <- get_locs(watina, area_codes = "KAL") mydata <- mylocs \%>\% get_xg3(watina, 1900) -mydata +mydata \%>\% arrange(loc_code, hydroyear) mydata \%>\% extract_xg3_series(xg3_type = c("L", "V"), max_gap = 2, min_dur = 5) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/get_chem.Rd b/man/get_chem.Rd index c841e2a..ba65f12 100644 --- a/man/get_chem.Rd +++ b/man/get_chem.Rd @@ -136,14 +136,29 @@ To retrieve all data from all water samples, use \code{en_range = c(-1, 1)}. TO BE ADDED: What is electroneutrality and why is it used as a criterion? } +\note{ +Up to and including \verb{watina 0.3.0}, the result was sorted according to +\code{loc_code}, \code{date} and \code{chem_variable}, both for the lazy query and the +collected result. +Later versions avoid sorting in case of a lazy result, because +otherwise, when using the result inside another lazy query, this led to +'ORDER BY' constructs in SQL subqueries, which must be avoided. +If you like to print the lazy object in a sorted manner, you must add +\verb{\%>\% arrange(...)} yourself. +} \examples{ \dontrun{ watina <- connect_watina() library(dplyr) mylocs <- get_locs(watina, area_codes = "ZWA") -mylocs \%>\% get_chem(watina, "1/1/2017") -mylocs \%>\% get_chem(watina, "1/1/2017", collect = TRUE) -mylocs \%>\% get_chem(watina, "1/1/2017", conc_type = "eq") +mylocs \%>\% + get_chem(watina, "1/1/2017") \%>\% + arrange(loc_code, date, chem_variable) +mylocs \%>\% + get_chem(watina, "1/1/2017", collect = TRUE) +mylocs \%>\% + get_chem(watina, "1/1/2017", conc_type = "eq") \%>\% + arrange(loc_code, date, chem_variable) # compare the number of returned rows: mylocs \%>\% get_chem(watina, "1/1/2017") \%>\% count @@ -163,10 +178,11 @@ get_chem(watina, "1/1/2017") \%>\% left_join(mylocs \%>\% select(-loc_wid), .) \%>\% - collect + collect \%>\% + arrange(loc_code, date, chem_variable) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/get_locs.Rd b/man/get_locs.Rd index 23a984a..45cfc1e 100644 --- a/man/get_locs.Rd +++ b/man/get_locs.Rd @@ -222,6 +222,16 @@ Here, the term 'observation well' is used to refer to a fixed installed device in the field (groundwater piezometer, surface water level measurement device). } +\note{ +Up to and including \verb{watina 0.3.0}, the result was sorted according to +\code{area_code} and \code{loc_code}, +both for the lazy query and the collected result. +Later versions avoid sorting in case of a lazy result, because +otherwise, when using the result inside another lazy query, this led to +'ORDER BY' constructs in SQL subqueries, which must be avoided. +If you like to print the lazy object in a sorted manner, you must add +\verb{\%>\% arrange(...)} yourself. +} \examples{ \dontrun{ watina <- connect_watina() @@ -232,7 +242,8 @@ get_locs(watina, bbox = c(xmin = 1.4e+5, xmax = 1.7e+5, ymin = 1.6e+5, - ymax = 1.9e+5)) + ymax = 1.9e+5)) \%>\% + arrange(area_code, loc_code) get_locs(watina, area_codes = c("KAL", "KBR"), @@ -295,22 +306,29 @@ get_locs(watina, head(12) # Selecting all piezometers with status VLD of the -# province "West-Vlaanderen": -data(BE_ADMIN_PROVINCE, - package = "BelgiumMaps.StatBel") +# province "West-Vlaanderen" (current polygon taken +# from the official WFS service): library(sf) -library(stringr) +library(purrr) +library(httr) mymask <- - st_as_sf(BE_ADMIN_PROVINCE) \%>\% - filter(str_detect(TX_PROV_DESCR_NL, "West")) \%>\% - st_transform(crs = 31370) + "https://geoservices.informatievlaanderen.be/overdrachtdiensten/VRBG/wfs" \%>\% + parse_url() \%>\% + list_merge(query = list(request = "GetFeature", + typeName = "VRBG:Refprv", + cql_filter="NAAM='West-Vlaanderen'", + srsName = "EPSG:31370", + outputFormat = "text/xml; subtype=gml/3.1.1")) \%>\% + build_url() \%>\% + read_sf(crs = 31370) \%>\% + st_cast("GEOMETRYCOLLECTION") get_locs(watina, loc_validity = "VLD", mask = mymask, buffer = 0) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/get_xg3.Rd b/man/get_xg3.Rd index 6e81edb..249b698 100644 --- a/man/get_xg3.Rd +++ b/man/get_xg3.Rd @@ -86,25 +86,40 @@ Currently, non-truncated values are returned, with usage of estimated values. Why truncate, and why truncate by default? When to choose which \code{vert_crs}?) } +\note{ +Up to and including \verb{watina 0.3.0}, the result was sorted according to +\code{loc_code} and \code{hydroyear}, both for the lazy query and the +collected result. +Later versions avoid sorting in case of a lazy result, because +otherwise, when using the result inside another lazy query, this led to +'ORDER BY' constructs in SQL subqueries, which must be avoided. +If you like to print the lazy object in a sorted manner, you must add +\verb{\%>\% arrange(...)} yourself. +} \examples{ \dontrun{ watina <- connect_watina() library(dplyr) mylocs <- get_locs(watina, area_codes = "KAL") -mylocs \%>\% get_xg3(watina, 2010) +mylocs \%>\% + get_xg3(watina, 2010) \%>\% + arrange(loc_code, hydroyear) mylocs \%>\% get_xg3(watina, 2010, collect = TRUE) -mylocs \%>\% get_xg3(watina, 2010, vert_crs = "ostend") +mylocs \%>\% + get_xg3(watina, 2010, vert_crs = "ostend") \%>\% + arrange(loc_code, hydroyear) # joining results to mylocs: mylocs \%>\% - get_xg3(watina, 2010) \%>\% - left_join(mylocs \%>\% - select(-loc_wid), - .) \%>\% - collect + get_xg3(watina, 2010) \%>\% + left_join(mylocs \%>\% + select(-loc_wid), + .) \%>\% + collect \%>\% + arrange(loc_code, hydroyear) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/selectlocs_chem.Rd b/man/selectlocs_chem.Rd index dea2e9a..c8e575c 100644 --- a/man/selectlocs_chem.Rd +++ b/man/selectlocs_chem.Rd @@ -183,7 +183,7 @@ mylocs <- get_locs(watina, area_codes = "ZWA") mydata <- mylocs \%>\% get_chem(watina, "1/1/2010") -mydata +mydata \%>\% arrange(loc_code, date, chem_variable) mydata \%>\% pull(date) \%>\% lubridate::year(.) \%>\% @@ -232,7 +232,7 @@ mydata \%>\% conditions = conditions_df) # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/man/selectlocs_xg3.Rd b/man/selectlocs_xg3.Rd index 023c954..c7fe217 100644 --- a/man/selectlocs_xg3.Rd +++ b/man/selectlocs_xg3.Rd @@ -279,7 +279,7 @@ mylocs <- get_locs(watina, mydata <- mylocs \%>\% get_xg3(watina, 2000) -mydata +mydata \%>\% arrange(loc_code, hydroyear) # Number of locations in mydata: mydata \%>\% distinct(loc_code) \%>\% count # Number of hydrological years per location and XG3 variable: @@ -317,7 +317,7 @@ result <- result$combined_result_filtered result[2:4] # Disconnect: -DBI::dbDisconnect(watina) +dbDisconnect(watina) } } diff --git a/pkgdown/extra.css b/pkgdown/extra.css new file mode 100644 index 0000000..fd261fe --- /dev/null +++ b/pkgdown/extra.css @@ -0,0 +1,13 @@ +body { + font-size: 18px; +} +pre { + font-size: 16px; +} +pre, code { + background-color: #fdfdfd; + color: #222; +} +code a, pre a { + color: #024b8e; +} diff --git a/vignettes/v010_getstarted.Rmd b/vignettes/v010_getstarted.Rmd index c1b640d..de3d402 100644 --- a/vignettes/v010_getstarted.Rmd +++ b/vignettes/v010_getstarted.Rmd @@ -20,7 +20,7 @@ library(stringr) library(knitr) ``` -_General note: the below vignette contains frozen output of 17 Sep 2019._ +_General note: the below vignette contains frozen output of 14 Jan 2021._ _This makes it possible to build the package with vignettes without access to the Watina database._ ## Connecting @@ -49,17 +49,29 @@ get_locs(watina) %>% collect %>% as.data.frame #> loc_wid loc_code area_code area_name x y loc_validitycode -#> 1 676 AABP011 AAB Aabeek 178331 209751 VLD -#> 2 677 AABP013 AAB Aabeek 178315 209637 VLD -#> 3 678 AABP014 AAB Aabeek 178310 209590 VLD -#> 4 681 AABP017 AAB Aabeek 178370 209461 VLD -#> 5 682 AABP018 AAB Aabeek 178379 209418 VLD -#> loc_validity loc_typecode loc_typename filterdepth soilsurf_ost -#> 1 Gevalideerd P Peilbuis/Piëzometer 2.90 10.34 -#> 2 Gevalideerd P Peilbuis/Piëzometer 2.71 10.34 -#> 3 Gevalideerd P Peilbuis/Piëzometer 2.74 9.41 -#> 4 Gevalideerd P Peilbuis/Piëzometer 2.72 9.31 -#> 5 Gevalideerd P Peilbuis/Piëzometer 2.58 9.50 +#> 1 45 AABP001 AAB Aabeek 177604 209714 VLD +#> 2 6291 AABP002 AAB Aabeek 177903 209459 VLD +#> 3 28 AABP003 AAB Aabeek 178353 209155 VLD +#> 4 29 AABP004 AAB Aabeek 179544 210259 VLD +#> 5 30 AABP005 AAB Aabeek 179610 210173 VLD +#> loc_validity loc_typecode loc_typename obswell_statecode +#> 1 Gevalideerd P Peilbuis/Piëzometer CONFN +#> 2 Gevalideerd P Peilbuis/Piëzometer CONFN +#> 3 Gevalideerd P Peilbuis/Piëzometer CONFN +#> 4 Gevalideerd P Peilbuis/Piëzometer CONFN +#> 5 Gevalideerd P Peilbuis/Piëzometer CONFN +#> obswell_state soilsurf_ost measuringref_ost tubelength filterlength +#> 1 OK - niet bemeten 10.136 10.656 4 2 +#> 2 OK - niet bemeten 9.590 10.120 4 2 +#> 3 OK - niet bemeten 9.660 10.150 4 2 +#> 4 OK - niet bemeten 10.310 10.730 4 2 +#> 5 OK - niet bemeten 10.480 10.950 4 2 +#> filterdepth +#> 1 2.48 +#> 2 2.47 +#> 3 2.51 +#> 4 2.58 +#> 5 2.53 ``` Selections can be done in a variety of ways -- and these ways can be combined: @@ -69,24 +81,24 @@ Selections can be done in a variety of ways -- and these ways can be combined: ```{r} get_locs(watina, area_codes = c("KAL", "KBR")) -#> # Source: lazy query [?? x 12] -#> # Database: Microsoft SQL Server -#> # Ordered by: area_code, loc_code, obswell_rank, area_code, loc_code +#> # Source: lazy query [?? x 17] +#> # Database: Microsoft SQL Server #> loc_wid loc_code area_code area_name x y loc_validitycode #> -#> 1 2824 KALP022 KAL Kalmthou… 155350 230813 VLD +#> 1 2824 KALP022 KAL Kalmthou… 155356 230811 VLD #> 2 1546 KALP030 KAL Kalmthou… 152990 238600 VLD -#> 3 10069 KALP034 KAL Kalmthou… 152962 231345 VLD -#> 4 1549 KALP035 KAL Kalmthou… 153000 236850 VLD -#> 5 1552 KALP039 KAL Kalmthou… 160940 235060 VLD -#> 6 4327 KALP050 KAL Kalmthou… 153881 233802 VLD -#> 7 6210 KALP051 KAL Kalmthou… 153822 233761 VLD -#> 8 1562 KALP052 KAL Kalmthou… 153822 233761 VLD -#> 9 1563 KALP053 KAL Kalmthou… 153781 233736 VLD -#> 10 6180 KALP054 KAL Kalmthou… 154689 232426 VLD -#> # … with more rows, and 5 more variables: loc_validity , -#> # loc_typecode , loc_typename , filterdepth , -#> # soilsurf_ost +#> 3 6205 KALP032 KAL Kalmthou… 159110 239010 VLD +#> 4 10069 KALP034 KAL Kalmthou… 152962 231345 VLD +#> 5 1549 KALP035 KAL Kalmthou… 153000 236850 VLD +#> 6 1552 KALP039 KAL Kalmthou… 160940 235060 VLD +#> 7 4327 KALP050 KAL Kalmthou… 153881 233802 VLD +#> 8 6210 KALP051 KAL Kalmthou… 153822 233761 VLD +#> 9 1562 KALP052 KAL Kalmthou… 153822 233761 VLD +#> 10 1563 KALP053 KAL Kalmthou… 153781 233736 VLD +#> # … with more rows, and 10 more variables: loc_validity , +#> # loc_typecode , loc_typename , obswell_statecode , +#> # obswell_state , soilsurf_ost , measuringref_ost , +#> # tubelength , filterlength , filterdepth ``` The number of involved locations is given by: @@ -99,7 +111,7 @@ get_locs(watina, #> # Database: Microsoft SQL Server #> n #> -#> 1 188 +#> 1 207 ``` @@ -114,9 +126,47 @@ get_locs(watina, #> # Database: Microsoft SQL Server #> n #> -#> 1 223 +#> 1 247 ``` +- for groundwater piezometers: by setting conditions on filterdepth. +The filterdepth is the depth of the piezometer's filter below soil surface (unit: m), more specifically referring to the position _halfway_ the length of the filter. +Conditions can be set by the arguments `filterdepth_range` and `filterdepth_na` (see documentation of `get_locs()`). +Moreover, by setting `filterdepth_guess = TRUE` (default `FALSE`) you can replace missing filterdepths by a conservative value, before applying these conditions. +By default, only filterdepths between 0 and 3 meters below soilsurface are returned and locations with missing filterdepth are dropped. + +Indeed, more records are returned when allowing groundwater piezometers with missing filterdepth: + +```{r} +get_locs(watina, + area_codes = c("KAL", "KBR"), + loc_type = c("P", "S"), + filterdepth_na = TRUE) %>% + count +#> # Source: lazy query [?? x 1] +#> # Database: Microsoft SQL Server +#> n +#> +#> 1 289 +``` + +Of course you can set further conditions for other variables, by building upon the lazy result of `get_locs()`, e.g.: + +```{r} +get_locs(watina, + area_codes = c("KAL", "KBR"), + loc_type = c("P", "S"), + filterdepth_na = TRUE) %>% + filter(obswell_state == "OK - niet bemeten") %>% + count +#> # Source: lazy query [?? x 1] +#> # Database: Microsoft SQL Server +#> n +#> +#> 1 157 +``` + + - by using a spatial bounding box (rectangular area), using the Belgian Lambert 72 coordinate reference system (EPSG-code [31370](https://epsg.io/31370)): ```{r} @@ -125,24 +175,24 @@ get_locs(watina, xmax = 1.7e+5, ymin = 1.6e+5, ymax = 1.9e+5)) -#> # Source: lazy query [?? x 12] -#> # Database: Microsoft SQL Server -#> # Ordered by: area_code, loc_code, obswell_rank, area_code, loc_code +#> # Source: lazy query [?? x 17] +#> # Database: Microsoft SQL Server #> loc_wid loc_code area_code area_name x y loc_validitycode #> #> 1 10261 ATGP002 ATG Antitank… 169572 186043 VLD -#> 2 5629 DORP016 DOR Dorent 158521 186074 VLD -#> 3 5630 DORP017 DOR Dorent 158442 185925 VLD -#> 4 5632 DORP019 DOR Dorent 158304 184915 VLD -#> 5 5634 DORP020 DOR Dorent 158352 184259 VLD -#> 6 5635 DORP021 DOR Dorent 158103 183853 VLD -#> 7 5586 DORP022 DOR Dorent 157715 184013 VLD -#> 8 5587 DORP023 DOR Dorent 154284 182533 VLD -#> 9 6420 DORP024 DOR Dorent 154823 182431 VLD -#> 10 5580 DORP027 DOR Dorent 155151 182190 VLD -#> # … with more rows, and 5 more variables: loc_validity , -#> # loc_typecode , loc_typename , filterdepth , -#> # soilsurf_ost +#> 2 777 BABP012 BAB Barebeek 160190 180988 VLD +#> 3 5629 DORP016 DOR Dorent 158521 186074 VLD +#> 4 5630 DORP017 DOR Dorent 158442 185925 VLD +#> 5 5632 DORP019 DOR Dorent 158304 184915 VLD +#> 6 5634 DORP020 DOR Dorent 158352 184259 VLD +#> 7 5635 DORP021 DOR Dorent 158103 183853 VLD +#> 8 5586 DORP022 DOR Dorent 157715 184013 VLD +#> 9 5587 DORP023 DOR Dorent 154284 182533 VLD +#> 10 6420 DORP024 DOR Dorent 154823 182431 VLD +#> # … with more rows, and 10 more variables: loc_validity , +#> # loc_typecode , loc_typename , obswell_statecode , +#> # obswell_state , soilsurf_ost , measuringref_ost , +#> # tubelength , filterlength , filterdepth ``` - if you already have specific location codes in mind, you can provide these directly as a vector: @@ -150,38 +200,44 @@ get_locs(watina, ```{r} get_locs(watina, loc_vec = c("KBRP081", "KBRP090", "KBRP095")) -#> # Source: lazy query [?? x 12] -#> # Database: Microsoft SQL Server -#> # Ordered by: area_code, loc_code, obswell_rank, area_code, loc_code +#> # Source: lazy query [?? x 17] +#> # Database: Microsoft SQL Server #> loc_wid loc_code area_code area_name x y loc_validitycode #> -#> 1 664 KBRP081 KBR Potpolde… 145846 204871 VLD -#> 2 698 KBRP090 KBR Potpolde… 145988 204702 VLD -#> 3 709 KBRP095 KBR Potpolde… 145949 204712 VLD -#> # … with 5 more variables: loc_validity , loc_typecode , -#> # loc_typename , filterdepth , soilsurf_ost +#> 1 664 KBRP081 KBR Polders … 145846 204871 VLD +#> 2 698 KBRP090 KBR Polders … 145988 204702 VLD +#> 3 709 KBRP095 KBR Polders … 145949 204712 VLD +#> # … with 10 more variables: loc_validity , loc_typecode , +#> # loc_typename , obswell_statecode , obswell_state , +#> # soilsurf_ost , measuringref_ost , tubelength , +#> # filterlength , filterdepth ``` - you can use a geospatial filter of class [sf](https://r-spatial.github.io/sf/) to make selections, by using the `mask` argument: ```{r echo=FALSE, include=FALSE} -data(BE_ADMIN_PROVINCE, - package = "BelgiumMaps.StatBel") -westfl <- - st_as_sf(BE_ADMIN_PROVINCE) %>% - filter(str_detect(TX_PROV_DESCR_NL, "West")) %>% - st_transform(crs = 31370) +westfl <- + "https://geoservices.informatievlaanderen.be/overdrachtdiensten/VRBG/wfs" %>% + httr::parse_url() %>% + purrr::list_merge(query = list(request = "GetFeature", + typeName = "VRBG:Refprv", + cql_filter="NAAM='West-Vlaanderen'", + srsName = "EPSG:31370", + outputFormat = "text/xml; subtype=gml/3.1.1")) %>% + httr::build_url() %>% + read_sf(crs = 31370) %>% + st_cast("GEOMETRYCOLLECTION") ``` ```{r} get_locs(watina, - mask = westfl, + mask = westfl, # sf polygon of the 'West-Vlaanderen' province buffer = 0) #> As a mask always invokes a collect(), the argument 'collect = FALSE' will be ignored. -#> Warning in get_locs(watina, mask = westfl, buffer = 0): Dropped 40 -#> locations from which x or y coordinates were missing. -#> # A tibble: 781 x 11 +#> Warning in get_locs(watina, mask = westfl, buffer = 0): Dropped 44 locations from which x or y coordinates were missing. +#> Warning in warn_xy_duplicates(locs$x, locs$y): 28 different coordinate pairs occur more than once. +#> # A tibble: 905 x 16 #> loc_code area_code area_name x y loc_validitycode loc_validity #> * #> 1 ASSP001 ASS Assebroe… 74324 208838 VLD Gevalideerd @@ -194,11 +250,14 @@ get_locs(watina, #> 8 ASSP008 ASS Assebroe… 73948 209104 VLD Gevalideerd #> 9 ASSP009 ASS Assebroe… 73613 208584 VLD Gevalideerd #> 10 ASSP010 ASS Assebroe… 73583 208390 VLD Gevalideerd -#> # … with 771 more rows, and 4 more variables: loc_typecode , -#> # loc_typename , filterdepth , soilsurf_ost +#> # … with 895 more rows, and 9 more variables: loc_typecode , +#> # loc_typename , obswell_statecode , obswell_state , +#> # soilsurf_ost , measuringref_ost , tubelength , +#> # filterlength , filterdepth ``` Note that the default value for `buffer` is 10 meters; it is used to enlarge mask (or shrink it, if `buffer` < 0). +Also, note there are a few warnings to inform about dropped locations and duplicated coordinates. Instead of leaving the output as a `tbl_lazy` object, you can also retrieve it as a tibble (i.e. an easier-to-use type of dataframe) by setting `collect = TRUE`. @@ -206,63 +265,76 @@ Instead of leaving the output as a `tbl_lazy` object, you can also retrieve it a get_locs(watina, area_codes = c("KAL", "KBR"), loc_type = c("P", "S"), - collect = TRUE - ) -#> # A tibble: 223 x 11 + collect = TRUE) +#> Warning in warn_xy_duplicates(locs$x, locs$y): 15 different coordinate pairs occur more than once. +#> # A tibble: 247 x 16 #> loc_code area_code area_name x y loc_validitycode loc_validity #> -#> 1 KBRP081 KBR Potpolde… 145846 204871 VLD Gevalideerd -#> 2 KBRP090 KBR Potpolde… 145988 204702 VLD Gevalideerd -#> 3 KBRP095 KBR Potpolde… 145949 204712 VLD Gevalideerd -#> 4 KBRS001 KBR Potpolde… 146937 205586 VLD Gevalideerd -#> 5 KALP030 KAL Kalmthou… 152990 238600 VLD Gevalideerd -#> 6 KALP035 KAL Kalmthou… 153000 236850 VLD Gevalideerd -#> 7 KALP039 KAL Kalmthou… 160940 235060 VLD Gevalideerd -#> 8 KALP052 KAL Kalmthou… 153822 233761 VLD Gevalideerd -#> 9 KALP053 KAL Kalmthou… 153781 233736 VLD Gevalideerd -#> 10 KALP059 KAL Kalmthou… 154754 232459 VLD Gevalideerd -#> # … with 213 more rows, and 4 more variables: loc_typecode , -#> # loc_typename , filterdepth , soilsurf_ost +#> 1 KALP022 KAL Kalmthou… 155356 230811 VLD Gevalideerd +#> 2 KALP030 KAL Kalmthou… 152990 238600 VLD Gevalideerd +#> 3 KALP032 KAL Kalmthou… 159110 239010 VLD Gevalideerd +#> 4 KALP034 KAL Kalmthou… 152962 231345 VLD Gevalideerd +#> 5 KALP035 KAL Kalmthou… 153000 236850 VLD Gevalideerd +#> 6 KALP039 KAL Kalmthou… 160940 235060 VLD Gevalideerd +#> 7 KALP050 KAL Kalmthou… 153881 233802 VLD Gevalideerd +#> 8 KALP051 KAL Kalmthou… 153822 233761 VLD Gevalideerd +#> 9 KALP052 KAL Kalmthou… 153822 233761 VLD Gevalideerd +#> 10 KALP053 KAL Kalmthou… 153781 233736 VLD Gevalideerd +#> # … with 237 more rows, and 9 more variables: loc_typecode , +#> # loc_typename , obswell_statecode , obswell_state , +#> # soilsurf_ost , measuringref_ost , tubelength , +#> # filterlength , filterdepth ``` Note that the lazy object contains a `loc_wid` column, which is an internal database variable to identify locations and make relations between tables. It should not be regarded as a stable ID; hence don't store it for later use (note that lazy objects don't store data). Consequently, the `loc_wid` column is omitted when using `collect = TRUE`. -Optionally, you can also retrieve the individual observation wells that are linked to each location (where each observation well is from a different timeframe): +Optionally, you can retrieve the individual **observation wells** that are linked to each location (where each observation well is from a different timeframe). +In this case, more attributes are returned (specific observation well attributes). +The below example also demonstrates the use of `tibble::glimpse()` to preview all columns: ```{r} get_locs(watina, obswells = TRUE, area_codes = c("KAL", "KBR"), - loc_type = c("P", "S")) -#> # Source: lazy query [?? x 14] -#> # Database: Microsoft SQL Server -#> # Ordered by: area_code, loc_code, obswell_rank -#> loc_wid loc_code area_code area_name x y loc_validitycode -#> -#> 1 2824 KALP022 KAL Kalmthou… 155350 230813 VLD -#> 2 1546 KALP030 KAL Kalmthou… 152990 238600 VLD -#> 3 10069 KALP034 KAL Kalmthou… 152962 231345 VLD -#> 4 1549 KALP035 KAL Kalmthou… 153000 236850 VLD -#> 5 1552 KALP039 KAL Kalmthou… 160940 235060 VLD -#> 6 4327 KALP050 KAL Kalmthou… 153881 233802 VLD -#> 7 6210 KALP051 KAL Kalmthou… 153822 233761 VLD -#> 8 1562 KALP052 KAL Kalmthou… 153822 233761 VLD -#> 9 1563 KALP053 KAL Kalmthou… 153781 233736 VLD -#> 10 6180 KALP054 KAL Kalmthou… 154689 232426 VLD -#> # … with more rows, and 7 more variables: loc_validity , -#> # loc_typecode , loc_typename , obswell_code , -#> # obswell_rank , filterdepth , soilsurf_ost + loc_type = c("P", "S")) %>% + glimpse() +#> Rows: ?? +#> Columns: 21 +#> $ loc_wid 1546, 1549, 1552, 1562, 1563, 1564, 1565, 1566, 1… +#> $ loc_code "KALP030", "KALP035", "KALP039", "KALP052", "KALP… +#> $ area_code "KAL", "KAL", "KAL", "KAL", "KAL", "KAL", "KAL", … +#> $ area_name "Kalmthoutse heide", "Kalmthoutse heide", "Kalmth… +#> $ x 152990, 153000, 160940, 153822, 153781, 154754, 1… +#> $ y 238600, 236850, 235060, 233761, 233736, 232459, 2… +#> $ loc_validitycode "VLD", "VLD", "VLD", "VLD", "VLD", "VLD", "VLD", … +#> $ loc_validity "Gevalideerd", "Gevalideerd", "Gevalideerd", "Gev… +#> $ loc_typecode "P", "P", "P", "P", "P", "P", "P", "P", "P", "P",… +#> $ loc_typename "Peilbuis/Piëzometer", "Peilbuis/Piëzometer", "Pe… +#> $ obswell_code "KALP030X", "KALP035X", "KALP039X", "KALP052X", "… +#> $ obswell_rank 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1… +#> $ obswell_statecode "CONFN", "CONFN", "CONFN", "CONFN", "CONFN", "CON… +#> $ obswell_state "OK - niet bemeten", "OK - niet bemeten", "OK - n… +#> $ obswell_installdate 1982-03-06, 1982-06-28, 1982-03-06, 1999-11-03, … +#> $ obswell_stopdate NA, NA, NA, NA, NA, NA, NA, NA, NA, 2020-05-10, … +#> $ soilsurf_ost 13.420, 15.200, 17.270, 21.060, 21.050, 20.540, 2… +#> $ measuringref_ost 13.580, 15.000, 17.150, 21.580, 21.490, 20.860, 2… +#> $ tubelength 3.12, 0.50, 2.26, 2.02, 2.04, 1.82, 3.25, 2.82, 2… +#> $ filterlength 1.00, 0.50, 1.00, 0.20, 0.30, 0.20, 0.20, 0.20, 0… +#> $ filterdepth 2.460, 0.450, 1.880, 1.400, 1.450, 1.400, 2.700, … ``` +With `get_locs(obswells = FALSE)`, which is the default, the observation well variables are aggregated for each location using one of four methods. +The method is set with the argument `obswell_aggr`. +The default, `obswell_aggr = "latest"`, returns the attributes of the most recent observation well that fulfills the `filterdepth_range` and `filterdepth_na` criteria. ## Disconnecting At the end of your work (a script, a bookdown document etc.), close the connection: ```{r} -DBI::dbDisconnect(watina) +dbDisconnect(watina) ``` diff --git a/vignettes/v110_xg3.Rmd b/vignettes/v110_xg3.Rmd index 86429d2..ac144af 100644 --- a/vignettes/v110_xg3.Rmd +++ b/vignettes/v110_xg3.Rmd @@ -25,7 +25,7 @@ _This makes it possible to build the package with vignettes without access to th ## Overview -XG3 is an umbrella for the three geohydrological variables LG3, HG3 and VG3, which are yearly statistics which can be calculated from series of water level measurements (on condition that data are frequent enough). +XG3 is an umbrella for the three geohydrological variables LG3, HG3 and VG3, which are yearly statistics which can be calculated from series of water level measurements (on condition that enough data are available). More explanation on these variables is given by the documentation of `get_xg3()`. Let's suppose that we want to select locations (within an area, i.e. after having used `get_locs()`) for which: @@ -195,6 +195,6 @@ It calls `eval_xg3_avail()` and `eval_xg3_series()` by itself, but alternatively ```{r include=FALSE} -DBI::dbDisconnect(watina) +dbDisconnect(watina) ``` diff --git a/vignettes/v210_chem.Rmd b/vignettes/v210_chem.Rmd index c01411e..b25c55c 100644 --- a/vignettes/v210_chem.Rmd +++ b/vignettes/v210_chem.Rmd @@ -313,6 +313,6 @@ The `selectlocs_chem()` function, of which we saw a demonstration above, calls ` Alternatively the user can provide the result of those functions as input to `selectlocs_chem()`. ```{r include=FALSE} -DBI::dbDisconnect(watina) +dbDisconnect(watina) ```