From 3d10e604bacda2dc050c7542b4b5a0da21a60bd4 Mon Sep 17 00:00:00 2001 From: dlizcano Date: Wed, 27 Mar 2024 01:40:26 -0500 Subject: [PATCH] update new funcion --- .gitignore | 2 +- NAMESPACE | 1 + R/distribution.R | 38 +++++++ R/get_mammalcol.R | 6 +- R/mammalcol24.R | 2 +- R/mammalmap.R | 8 +- R/sp_by_depto.R | 54 ++++++++++ README.Rmd | 8 +- README.md | 2 +- docs/index.html | 2 +- docs/pkgdown.yml | 2 +- docs/reference/distribution.html | 163 +++++++++++++++++++++++++++++++ docs/reference/index.html | 8 ++ docs/reference/sp_by_depto.html | 146 +++++++++++++++++++++++++++ docs/reference/taxon.html | 4 +- docs/sitemap.xml | 6 ++ man/distribution.Rd | 46 +++++++++ man/sp_by_depto.Rd | 38 +++++++ man/taxon.Rd | 2 +- 19 files changed, 521 insertions(+), 17 deletions(-) create mode 100644 R/distribution.R create mode 100644 R/sp_by_depto.R create mode 100644 docs/reference/distribution.html create mode 100644 docs/reference/sp_by_depto.html create mode 100644 man/distribution.Rd create mode 100644 man/sp_by_depto.Rd diff --git a/.gitignore b/.gitignore index 0fc4bfa..a68c5e7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ .Rhistory .RData .Ruserdata - +.travis.yml diff --git a/NAMESPACE b/NAMESPACE index c76868f..62f5bca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,3 +2,4 @@ export(mammalmap) export(search_mammalcol) +export(sp_by_depto) diff --git a/R/distribution.R b/R/distribution.R new file mode 100644 index 0000000..e5573ec --- /dev/null +++ b/R/distribution.R @@ -0,0 +1,38 @@ +#' distribution Dataset +#' +#' The distribution dataset comprises a data frame containing information +#' regarding mammal species distribution documented in Colombia, sourced from the "List of the +#' mammals of Colombia" compiled by Ramírez-Chaves 2021. It encompasses various +#' details, including locality, source, threat status and appendix CITES of each species. +#' The table is a variant of the distribution table from https://doi.org/10.15472/kl1whs +#' +#' @format A data frame with 548 rows and 8 columns: +#' \describe{ +#' \item{id}{id from original taxon table.} +#' \item{locality}{Departamento were the mammal is present.} +#' \item{countryCode}{Code for Colombia.} +#' \item{establishmentMeans}{Is endemic?. Endémica=Yes} +#' \item{threatStatus}{categorization previous to 2021} +#' \item{appendixCITES}{Apendix form CITES} +#' \item{source}{reference for distribution} +#' \item{occurrenceRemarks}{region were the species occurs.} +#' } +#' +#' +#' @details This dataset is designed to provide users with comprehensive +#' information about the mammal species found in Colombia, as documented +#' by Ramírez-Chaves 2021. It is organized for easy access and utilization within +#' the R environment. +#' +#' @seealso +#' For more information about the "mammalcol" package and the data sources, visit +#' the package's GitHub repository: \url{https://github.com/dlizcano/mammalcol} +#' +#' @references +#' The dataset is based on the "List of the Mammals of Colombia" by Ramírez-Chaves 2021. +#' +#' @author +#' Data compilation: Ramírez-Chaves 2021, Package implementation: Diego J. Lizcano +#' +#' @keywords dataset +"distribution" diff --git a/R/get_mammalcol.R b/R/get_mammalcol.R index 26f3094..658b70e 100644 --- a/R/get_mammalcol.R +++ b/R/get_mammalcol.R @@ -38,7 +38,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) { splist_std <- unique(splist_st) # create an output data container - output_matrix <- matrix(nrow = length(splist_std), ncol = 20) # two more + output_matrix <- matrix(nrow = length(splist_std), ncol = 21) # two more colnames(output_matrix) <- c("name_submitted", names(taxon), "Distance") @@ -61,7 +61,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) { # check non matching result if (length(matches) == 0) { - row_data <- rep("nill", 18) # number of columns + row_data <- rep("nill", 19) # number of columns } else if (length(matches) != 0){ # match result dis_value <- as.numeric(utils::adist(splist_std[i], matches)) @@ -69,7 +69,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) { dis_val_1 <- dis_value[dis_value <= max_distance_fixed] if (length(matches1) == 0){ - row_data <- rep("nill", 18) # number of columns + row_data <- rep("nill", 19) # number of columns } else if (length(matches1) != 0){ row_data <- as.matrix(taxon[taxon$scientificName %in% matches1,]) diff --git a/R/mammalcol24.R b/R/mammalcol24.R index 31d3a51..9f3cf0c 100644 --- a/R/mammalcol24.R +++ b/R/mammalcol24.R @@ -2,7 +2,7 @@ #' #' The taxon dataset comprises a tibble containing information #' regarding mammal species documented in Colombia, sourced from the "List of the -#' mammals of Peru" compiled by Ramírez-Chaves 2021. It encompasses various +#' mammals of Colombia" compiled by Ramírez-Chaves 2021. It encompasses various #' details, including scientific names, English names, Spanish names, order, #' family, threat status and distribution of each species. The table is a #' variant of the taxon table from https://doi.org/10.15472/kl1whs diff --git a/R/mammalmap.R b/R/mammalmap.R index ac0157c..278c7b0 100644 --- a/R/mammalmap.R +++ b/R/mammalmap.R @@ -18,8 +18,8 @@ #' species <- "Tapirus pinchaque" #' mammalmap(species) #' -#' @importFrom rlang taxon -#' @importFrom rlang colmap +#' @importFrom mammalcol taxon +#' @importFrom mammalcol colmap #' @export mammalmap <- function(species){ @@ -28,6 +28,10 @@ mammalmap <- function(species){ if (!requireNamespace("sf", quietly = TRUE)) install.packages("sf") + if (!is.character(species)) { + stop(paste0("Argument species must be a character, not ", class(Species))) + } + # require("ggplot2") # require("sf") diff --git a/R/sp_by_depto.R b/R/sp_by_depto.R new file mode 100644 index 0000000..771058f --- /dev/null +++ b/R/sp_by_depto.R @@ -0,0 +1,54 @@ +#' Mammal occurrence by departamento +#' +#' Find the mammal species that occur in a given departamento of Colombia +#' +#' @param states a character vector with one or more departamento names +#' @param type type of matching to be used. \code{any} will return the mammal species that +#' occur in any of the passed \code{states}. \code{only} matches mammal species that +#' occur only in all provided (no more, no less) \code{states} and \code{all} matches +#' mammal species that occur at least in all \code{states} passed. See examples. +#' @param taxa optional character vector to match against the departamentos. Use the order name +#' @export +#' @return a data frame +#' @examples +#' \dontrun{ +#' occ.any <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "any") +#' occ.only <- sp_by_depto(c("Norte de Santander"), type = "only") +#' occ.all <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all") +#' occ.taxa <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all", taxa = "Chiroptera") +#' +#' head(occ.any) +#' head(occ.only) +#' head(occ.all) +#' head(occ.taxa) +#' } +#' +sp_by_depto <- function(states, type = c("any", "only", "all"), taxa = NULL) { + if (length(states) == 0) stop("Please provide at least one Colombian Departamento") + type <- match.arg(type) + states <- sort(states) + # states <- paste("BR-", states, sep = "") + if (length(states) == 0) stop("Please provide at least one Colombian Departamento") + # res <- lapply(occurrences, match, states) + if (type == "any") { + #res <- lapply(res, function(x) any(!is.na(x))) + res <- subset(distribution, grepl(paste(states, collapse = "|"), locality)) + } + if (type == "only") { + res <- subset(distribution, grepl(paste("^", paste(states, collapse = "\\|"), "$", sep = ""), locality)) + } + if (type == "all") { + res <- subset(distribution, grepl(paste(states, collapse = ".*"), locality)) + } + # res <- distribution[unlist(res), ] + if (nrow(res) == 0) { + return(NA) + } + if (is.null(taxa)) { + merge(taxon[, c("scientificName", "family", "order", "id")], res[, c("id", "locality")], by = "id")[,-1] + # removes id + } else { + merge(taxon[taxon$order %in% taxa, c("scientificName", "family", "order", "id" )], res[, c("id", "locality")], by = "id")[,-1] + # removes id + } +} diff --git a/README.Rmd b/README.Rmd index e24eae4..88659dc 100644 --- a/README.Rmd +++ b/README.Rmd @@ -33,7 +33,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) { splist_std <- unique(splist_st) # create an output data container - output_matrix <- matrix(nrow = length(splist_std), ncol = 20) # two more + output_matrix <- matrix(nrow = length(splist_std), ncol = 21) # two more colnames(output_matrix) <- c("name_submitted", names(taxon), "Distance") @@ -56,7 +56,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) { # check non matching result if (length(matches) == 0) { - row_data <- rep("nill", 18) # number of columns + row_data <- rep("nill", 19) # number of columns } else if (length(matches) != 0){ # match result dis_value <- as.numeric(utils::adist(splist_std[i], matches)) @@ -64,7 +64,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) { dis_val_1 <- dis_value[dis_value <= max_distance_fixed] if (length(matches1) == 0){ - row_data <- rep("nill", 18) # number of columns + row_data <- rep("nill", 19) # number of columns } else if (length(matches1) != 0){ row_data <- as.matrix(taxon[taxon$scientificName %in% matches1,]) @@ -223,7 +223,7 @@ search_mammalcol(splist) ``` -#### Search database whith typos +#### Search database with typos `mammalcol` has the ability to identify and correct minor typos and lower case in the genus. Correcting potential typos or variations in species names through fuzzy matching is a crucial aspect of data management. This technique ensures precise retrieval by adeptly identifying and accommodating minor differences in input names, thus enhancing the reliability of analyses conducted on diverse, inconsistent, and ensemble datasets. diff --git a/README.md b/README.md index 2f5a5a3..0c07ff2 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ search_mammalcol(splist) #> 4 Northern Pudu 17 ``` -#### Search database whith typos +#### Search database with typos `mammalcol` has the ability to identify and correct minor typos and lower case in the genus. Correcting potential typos or variations in diff --git a/docs/index.html b/docs/index.html index 5483696..36c96ad 100644 --- a/docs/index.html +++ b/docs/index.html @@ -175,7 +175,7 @@

Search database#> 3 Lowland Tapir 16 #> 4 Northern Pudu 17
-

Search database whith typos +

Search database with typos

mammalcol has the ability to identify and correct minor typos and lower case in the genus. Correcting potential typos or variations in species names through fuzzy matching is a crucial aspect of data management. This technique ensures precise retrieval by adeptly identifying and accommodating minor differences in input names, thus enhancing the reliability of analyses conducted on diverse, inconsistent, and ensemble datasets.

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 28ead3a..b47769a 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -2,7 +2,7 @@ pandoc: 3.1.1
 pkgdown: 2.0.7
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-03-26T05:55Z
+last_built: 2024-03-27T06:37Z
 urls:
   reference: https://dlizcano.github.io/mammalcol/reference
   article: https://dlizcano.github.io/mammalcol/articles
diff --git a/docs/reference/distribution.html b/docs/reference/distribution.html
new file mode 100644
index 0000000..5ca8077
--- /dev/null
+++ b/docs/reference/distribution.html
@@ -0,0 +1,163 @@
+
+distribution Dataset — distribution • mammalcol
+    
+
+    
+
+ + + +
+
+ + +
+

The distribution dataset comprises a data frame containing information +regarding mammal species distribution documented in Colombia, sourced from the "List of the +mammals of Colombia" compiled by Ramírez-Chaves 2021. It encompasses various +details, including locality, source, threat status and appendix CITES of each species. +The table is a variant of the distribution table from https://doi.org/10.15472/kl1whs

+
+ +
+
distribution
+
+ +
+

Format

+

A data frame with 548 rows and 8 columns:

id
+

id from original taxon table.

+ +
locality
+

Departamento were the mammal is present.

+ +
countryCode
+

Code for Colombia.

+ +
establishmentMeans
+

Is endemic?. Endémica=Yes

+ +
threatStatus
+

categorization previous to 2021

+ +
appendixCITES
+

Apendix form CITES

+ +
source
+

reference for distribution

+ +
occurrenceRemarks
+

region were the species occurs.

+ + +
+
+

Details

+

This dataset is designed to provide users with comprehensive +information about the mammal species found in Colombia, as documented +by Ramírez-Chaves 2021. It is organized for easy access and utilization within +the R environment.

+
+
+

References

+

The dataset is based on the "List of the Mammals of Colombia" by Ramírez-Chaves 2021.

+
+
+

See also

+

For more information about the "mammalcol" package and the data sources, visit +the package's GitHub repository: https://github.com/dlizcano/mammalcol

+
+
+

Author

+

Data compilation: Ramírez-Chaves 2021, Package implementation: Diego J. Lizcano

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 3f530b1..08691f8 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,6 +78,10 @@

All functions colmap

colmap Dataset

+ +

distribution

+ +

distribution Dataset

mammalmap()

@@ -86,6 +90,10 @@

All functions search_mammalcol()

Retrieve Data from the List of the Mammals of Colombia

+ +

sp_by_depto()

+ +

Mammal occurrence by departamento

taxon

diff --git a/docs/reference/sp_by_depto.html b/docs/reference/sp_by_depto.html new file mode 100644 index 0000000..c3a2538 --- /dev/null +++ b/docs/reference/sp_by_depto.html @@ -0,0 +1,146 @@ + +Mammal occurrence by departamento — sp_by_depto • mammalcol + + +
+
+ + + +
+
+ + +
+

Find the mammal species that occur in a given departamento of Colombia

+
+ +
+
sp_by_depto(states, type = c("any", "only", "all"), taxa = NULL)
+
+ +
+

Arguments

+
states
+

a character vector with one or more departamento names

+ + +
type
+

type of matching to be used. any will return the mammal species that +occur in any of the passed states. only matches mammal species that +occur only in all provided (no more, no less) states and all matches +mammal species that occur at least in all states passed. See examples.

+ + +
taxa
+

optional character vector to match against the departamentos. Use the order name

+ +
+
+

Value

+ + +

a data frame

+
+ +
+

Examples

+
if (FALSE) {
+occ.any <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "any")
+occ.only <- sp_by_depto(c("Norte de Santander"), type = "only")
+occ.all <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all")
+occ.taxa <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all", taxa = "Chiroptera")
+
+head(occ.any)
+head(occ.only)
+head(occ.all)
+head(occ.taxa)
+}
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/taxon.html b/docs/reference/taxon.html index cdbfd50..ed6fd96 100644 --- a/docs/reference/taxon.html +++ b/docs/reference/taxon.html @@ -1,7 +1,7 @@ taxon Dataset — taxon • mammalcol