Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for ArcGIS item and REST service URL validation and checks #31

Open
elipousson opened this issue Feb 23, 2024 · 0 comments
Open

Comments

@elipousson
Copy link

For validation and handling of input URLs within {arcgislayers} and this package, it would be helpful to have a reliable set of url validation and checking functions.

This reprex is a first attempt at adapting and updating the existing URL checking and validation code from {esri2sf} for reuse in this package.

library(arcgisutils)
library(rlang)
#> 
#> Attaching package: 'rlang'
#> The following object is masked from 'package:arcgisutils':
#> 
#>     %||%

# Helper for setting URL pattern
set_url_pattern <- function(pattern = NULL, nm = NULL, collapse = "|") {
  if (is.null(pattern)) {
    return(pattern)
  }

  if (!is.null(nm)) {
    nm <- match.arg(nm, names(pattern))
    pattern <- pattern[nm]
  }

  paste0(pattern, collapse = collapse)
}

# Does x match the pattern of a URL?
# @noRd
is_url <- function(
    x,
    pattern = NULL,
    ...) {
  if (!is_vector(x) || is_empty(x)) {
    return(FALSE)
  }

  url_pattern <-
    "http[s]?://(?:[[:alnum:]]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"

  if (is.null(pattern)) {
    return(grepl(url_pattern, x, ...))
  }

  pattern <- paste0(pattern, collapse = "|")

  grepl(url_pattern, x, ...) & grepl(pattern, x, ...)
}



# Check if x is a valid URL
# @noRd
check_url <- function(
    x,
    pattern = NULL,
    ...,
    allow_null = FALSE,
    message = NULL,
    arg = caller_arg(url),
    call = caller_env()) {
  if (allow_null && is.null(x)) {
    return(invisible(NULL))
  }

  if (is_url(x, pattern = pattern, ...)) {
    return(invisible(NULL))
  }

  check_string(
    x,
    allow_empty = FALSE,
    allow_null = allow_null,
    arg = arg,
    call = call
  )

  if (is.null(message)) {
    message <- "{.arg {arg}} must be a valid url,
    not {.obj_type_friendly {x}}."

    if (!is.null(pattern)) {
      message <- "{.arg {arg}} must be a valid url
    matching the pattern {.str {pattern}}"
    }
  }

  cli::cli_abort(
    message,
    call = call
  )
}

# Check if x is a string
# @noRd
check_string <- function(
    x,
    allow_empty = TRUE,
    allow_null = FALSE,
    arg = caller_arg(x),
    call = caller_env()
) {

  if (allow_null && is.null(x)) {
    return(invisible(NULL))
  }

  message <- "{.arg {arg}} must be a scalar character vector."

  if (is_scalar_character(x)) {
    if (allow_empty || x != "") {
      return(invisible(NULL))
    }

    message <- '{.arg {arg}} must be a non-empty string.'
  }

  cli::cli_abort(
    message,
    call = call
  )
}

# Vector of valid ESRI service types
esri_service_types <- c(
  "MapServer", "FeatureServer", "ImageServer", "GeoDataServer",
  "GeocodeServer", "GeometryServer", "GPServer", "WFSServer", "WFCServer"
)

# Named vector of ESRI service types
esri_service_type_patterns <- setNames(
  paste0("(?<=/rest/services).+/", esri_service_types, "/"),
  esri_service_types
)

# Vector of valid ESRI service types
esri_layer_types <- c(
  "FeatureLayer", "Table", "GroupLayer"
)

# Named vector of ESRI Item URL patterns
esri_item_url_patterns <- c(
  "content" = "/home/content\\.html\\?view=",
  "search" = "/home/search\\.html",
  "item" = "/home/item\\.html\\?id=",
  "group" = "/home/group\\.html\\?id=",
  "user" = "/home/user\\.html\\?user=",
  "scene" = "/home/webscene/viewer\\.html\\?webscene=",
  "app" = "/index\\.html\\?appid=",
  "notebook" = "/notebook/notebook\\.html\\?rid=",
  "experience" = "/experience/"
)

# Named vector of ESRI URL patterns for data services and items
esri_service_url_patterns <- c(
  "root" = "/rest/services/?$",
  "service" = paste0("(?<=/rest/services)",
                     "(", paste0(esri_service_types, collapse = "|"), ")/?$"),
  "layer" = paste0("(?<=/rest/services)",
                     "(", paste0(esri_service_types, collapse = "|"), ")",
                     "/[[:digit:]]+/?$"),
  esri_service_type_patterns
)

# Named vector of messages for what defines a valid URL of the specified type
valid_esri_url_messages <- c(
  "root" = "A {.val {type}} URL must end in {.str /rest/services}.",
  "folder" = "A {.val {type}} URL must be a 'Folder' endpoint",
  "service" = "A {.val {type}} URL must end in one of the supported service types ({esri_service_types})",
  "feature" = "A {.val {type}} URL must end in a feature ID",
  "content" = "A {.val {type}} URL must include the text {.str /content/}.",
  "search" = "A {.val {type}} URL must include the text {.str /home/search.html?q=}.",
  "item" = "An {.val {type}} URL must include the text {.str /home/item.html?id=}.",
  "group" = "A {.val {type}} URL must include the text {.str /home/group.html?id=}.",
  "user" = "A {.val {type}} URL must include the text {.str /home/user.html?user=}.",
  "scene" = "A {.val {type}} URL must include the text {.str /home/webscene/viewer.html?webscene=}.",
  "app" = "An {.val {type}} URL must include the text {.str /index.html?appid=}.",
  "notebook" = "An {.val {type}} URL must include the text {.str /notebook/notebook.html?rid=}.",
  "experience" = "An {.val {type}} URL must include the text {.str /experience/}.",
  setNames(
    paste0("A {.val {type}} URL must include the text {.str /", esri_service_types, "/} after {.str /rest/services/}"),
    esri_service_types
  )
)

# Is a character vector an ESRI service or item URL?
# @name is_esri_url
NULL
#> NULL

# - [is_esri_services_url()]: Does x match the pattern of a ESRI Item URL?
# @name is_esri_item_url
# @rdname is_esri_url
# @export
is_esri_item_url <- function(x, type = NULL) {
  is_url(
    x,
    pattern = set_url_pattern(esri_item_url_patterns, nm = type),
    perl = TRUE)
}

# - [is_esri_services_url()] Does x match the pattern of a ESRI Service URL?
# @name is_esri_services_url
# @rdname is_esri_url
# @export
is_esri_services_url <- function(x, type = NULL) {
  is_url(
    x,
    pattern = set_url_pattern(esri_service_url_patterns, nm = type),
    perl = TRUE)
}

# - [is_esri_folder_url()]: Is x a folder URL?
# @name is_esri_folder_url
# @rdname is_esri_url
# @export
is_esri_folder_url <- function(x, metadata = NULL, ..., call = caller_env()) {
  if (is.null(metadata)) {
    check_string(x, call = call)
    metadata <- fetch_layer_metadata(httr2::request(x), ..., error_call = call)
  }

  # FIXME: This is carried over from esri2sf but needs to be double-checked
  is_url(x, pattern = "/rest/services") & ("folders" %in% names(metadata))
}

check_esri_url <- function(x,
                           type = "service",
                           values = NULL,
                           allow_null = FALSE,
                           arg = caller_arg(x),
                           call = caller_env()) {
  if (allow_null && is.null(x)) {
    return(invisible(NULL))
  }

  check_url(x, arg = arg, call = call)

  if (is.null(values)) {
    values <- names(
      c(esri_item_url_patterns,
        esri_service_url_patterns,
        esri_service_type_patterns)
      )
  }

  type <- arg_match(type, values = values)

  if (type %in% names(esri_service_url_patterns)) {
    pattern <- esri_service_url_patterns
  } else if (type %in% names(esri_item_url_patterns)) {
    pattern <- esri_item_url_patterns
  } else if (type %in% names(esri_service_type_patterns)) {
    pattern <- esri_service_type_patterns
  }

  pattern <- set_url_pattern(pattern, type)

  if (is_url(x, pattern = pattern, perl = TRUE)) {
    return(invisible(NULL))
  }

  info_message <- valid_esri_url_messages[type]

  info_message <- set_names(
    info_message,
    rep_len("i", length(info_message))
  )

  cli::cli_abort(
    c("{.arg {arg}} must be a valid {.val {type}} url.",
      validation_messages),
    call = call
  )
}

# Examples

item_url <- "https://www.arcgis.com/home/item.html?id=ebeb65deb5c14f4d8849fd68944b7ee6"
search_url <- "https://www.arcgis.com/home/search.html?restrict=false&sortField=relevance&sortOrder=desc&searchTerm=census#content"
root_url <- "https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services"
server_url <- "https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_Census_2020_DHC_Total_Population/FeatureServer/"
layer_url <- "https://services.arcgis.com/P3ePLMYs2RVChkJx/ArcGIS/rest/services/USA_Census_2020_DHC_Total_Population/FeatureServer/3"

is_esri_item_url(item_url)
#> [1] TRUE

is_esri_item_url(item_url, "search")
#> [1] FALSE

is_esri_item_url(search_url)
#> [1] TRUE

is_esri_item_url(search_url, "item")
#> [1] FALSE

is_esri_services_url(root_url)
#> [1] TRUE

is_esri_services_url(server_url)
#> [1] TRUE

is_esri_services_url(server_url, "root")
#> [1] FALSE

is_esri_services_url(layer_url)
#> [1] TRUE

is_esri_services_url(layer_url, "ImageServer")
#> [1] FALSE

Created on 2024-02-23 with reprex v2.1.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant