update new funcion

dlizcano · Mar 27, 2024 · 3d10e60 · 3d10e60
1 parent c09e7b1
commit 3d10e60
Show file tree

Hide file tree

Showing 19 changed files with 521 additions and 17 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
-
+.travis.yml
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,3 +2,4 @@
 
 export(mammalmap)
 export(search_mammalcol)
+export(sp_by_depto)
diff --git a/R/distribution.R b/R/distribution.R
@@ -0,0 +1,38 @@
+#' distribution Dataset
+#'
+#' The distribution dataset comprises a data frame containing information
+#' regarding mammal species distribution documented in Colombia, sourced from the "List of the
+#' mammals of Colombia" compiled by Ramírez-Chaves 2021. It encompasses various
+#' details, including locality, source, threat status and appendix CITES of each species. 
+#' The table is a variant of the distribution table from https://doi.org/10.15472/kl1whs
+#'
+#' @format A data frame with 548 rows and 8 columns:
+#'   \describe{
+#'     \item{id}{id from original taxon table.}
+#'     \item{locality}{Departamento were the mammal is present.}
+#'     \item{countryCode}{Code for Colombia.}
+#'     \item{establishmentMeans}{Is endemic?. Endémica=Yes}
+#'     \item{threatStatus}{categorization previous to 2021}
+#'     \item{appendixCITES}{Apendix form CITES}
+#'     \item{source}{reference for distribution}
+#'     \item{occurrenceRemarks}{region were the species occurs.}
+#'   }
+#'
+#'
+#' @details This dataset is designed to provide users with comprehensive
+#' information about the mammal species found in Colombia, as documented
+#' by Ramírez-Chaves 2021. It is organized for easy access and utilization within
+#' the R environment.
+#'
+#' @seealso
+#' For more information about the "mammalcol" package and the data sources, visit
+#' the package's GitHub repository: \url{https://github.com/dlizcano/mammalcol}
+#'
+#' @references
+#' The dataset is based on the "List of the Mammals of Colombia" by Ramírez-Chaves 2021.
+#'
+#' @author
+#' Data compilation: Ramírez-Chaves 2021, Package implementation: Diego J. Lizcano
+#'
+#' @keywords dataset
+"distribution"
diff --git a/R/get_mammalcol.R b/R/get_mammalcol.R
@@ -38,7 +38,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) {
   splist_std <- unique(splist_st)
 
   # create an output data container
-  output_matrix <- matrix(nrow = length(splist_std), ncol = 20) # two more
+  output_matrix <- matrix(nrow = length(splist_std), ncol = 21) # two more
   colnames(output_matrix) <- c("name_submitted",
                                names(taxon), 
                                "Distance")
@@ -61,15 +61,15 @@ search_mammalcol <- function(splist, max_distance = 0.2) {
 
     # check non matching result
     if (length(matches) == 0) {
-      row_data <- rep("nill", 18) # number of columns
+      row_data <- rep("nill", 19) # number of columns
     }
     else if (length(matches) != 0){ # match result
       dis_value <- as.numeric(utils::adist(splist_std[i], matches))
       matches1 <- matches[dis_value <= max_distance_fixed]
       dis_val_1 <- dis_value[dis_value <= max_distance_fixed]
 
       if (length(matches1) == 0){
-        row_data <- rep("nill", 18) # number of columns
+        row_data <- rep("nill", 19) # number of columns
       }
       else if (length(matches1) != 0){
         row_data <- as.matrix(taxon[taxon$scientificName %in% matches1,])

diff --git a/R/mammalcol24.R b/R/mammalcol24.R
@@ -2,7 +2,7 @@
 #'
 #' The taxon dataset comprises a tibble containing information
 #' regarding mammal species documented in Colombia, sourced from the "List of the
-#' mammals of Peru" compiled by Ramírez-Chaves 2021. It encompasses various
+#' mammals of Colombia" compiled by Ramírez-Chaves 2021. It encompasses various
 #' details, including scientific names, English names, Spanish names, order,
 #'  family, threat status and distribution of each species. The table is a 
 #'  variant of the taxon table from https://doi.org/10.15472/kl1whs

diff --git a/R/mammalmap.R b/R/mammalmap.R
@@ -18,8 +18,8 @@
 #' species <- "Tapirus pinchaque" 
 #' mammalmap(species)
 #'
-#' @importFrom rlang taxon
-#' @importFrom rlang colmap
+#' @importFrom mammalcol taxon
+#' @importFrom mammalcol colmap
 #' @export
 mammalmap <- function(species){
 
@@ -28,6 +28,10 @@ mammalmap <- function(species){
   if (!requireNamespace("sf", quietly = TRUE))
     install.packages("sf")
 
+  if (!is.character(species)) {
+    stop(paste0("Argument species must be a character, not ", class(Species)))
+  }
+
   # require("ggplot2")
   # require("sf")
 

diff --git a/R/sp_by_depto.R b/R/sp_by_depto.R
@@ -0,0 +1,54 @@
+#' Mammal occurrence by departamento
+#' 
+#' Find the mammal species that occur in a given departamento of Colombia
+#' 
+#' @param states a character vector with one or more departamento names
+#' @param type type of matching to be used. \code{any} will return the mammal species that
+#'   occur in any of the passed \code{states}. \code{only} matches mammal species that
+#'   occur only in all provided (no more, no less) \code{states} and \code{all} matches 
+#'   mammal species that occur at least in all \code{states} passed. See examples.
+#' @param taxa optional character vector to match against the departamentos. Use the order name
+#' @export
+#' @return a data frame
+#' @examples
+#' \dontrun{
+#' occ.any <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "any")
+#' occ.only <- sp_by_depto(c("Norte de Santander"), type = "only")
+#' occ.all <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all")
+#' occ.taxa <- sp_by_depto(c("Arauca", "Norte de Santander"), type = "all", taxa = "Chiroptera")
+#' 
+#' head(occ.any)
+#' head(occ.only)
+#' head(occ.all)
+#' head(occ.taxa)
+#' }
+#' 
+sp_by_depto <- function(states, type = c("any", "only", "all"), taxa = NULL) {
+  if (length(states) == 0) stop("Please provide at least one Colombian Departamento")
+  type <- match.arg(type)
+  states <- sort(states)
+  # states <- paste("BR-", states, sep = "")
+  if (length(states) == 0) stop("Please provide at least one Colombian Departamento")
+  # res <- lapply(occurrences, match, states)
+  if (type == "any") {
+    #res <- lapply(res, function(x) any(!is.na(x)))
+    res <- subset(distribution, grepl(paste(states, collapse = "|"), locality))
+  }
+  if (type == "only") {
+    res <- subset(distribution, grepl(paste("^", paste(states, collapse = "\\|"), "$", sep = ""), locality))
+  }
+  if (type == "all") {
+    res <- subset(distribution, grepl(paste(states, collapse = ".*"), locality))
+  }
+  # res <- distribution[unlist(res), ]
+  if (nrow(res) == 0) {
+    return(NA)
+  }
+  if (is.null(taxa)) {
+    merge(taxon[, c("scientificName", "family", "order",  "id")], res[, c("id", "locality")], by = "id")[,-1]
+    # removes id
+  } else {
+    merge(taxon[taxon$order %in% taxa, c("scientificName", "family", "order",  "id" )], res[, c("id", "locality")], by = "id")[,-1]
+    # removes id
+  }
+}
diff --git a/README.Rmd b/README.Rmd
@@ -33,7 +33,7 @@ search_mammalcol <- function(splist, max_distance = 0.2) {
   splist_std <- unique(splist_st)
 
   # create an output data container
-  output_matrix <- matrix(nrow = length(splist_std), ncol = 20) # two more
+  output_matrix <- matrix(nrow = length(splist_std), ncol = 21) # two more
   colnames(output_matrix) <- c("name_submitted",
                                names(taxon), 
                                "Distance")
@@ -56,15 +56,15 @@ search_mammalcol <- function(splist, max_distance = 0.2) {
 
     # check non matching result
     if (length(matches) == 0) {
-      row_data <- rep("nill", 18) # number of columns
+      row_data <- rep("nill", 19) # number of columns
     }
     else if (length(matches) != 0){ # match result
       dis_value <- as.numeric(utils::adist(splist_std[i], matches))
       matches1 <- matches[dis_value <= max_distance_fixed]
       dis_val_1 <- dis_value[dis_value <= max_distance_fixed]
 
       if (length(matches1) == 0){
-        row_data <- rep("nill", 18) # number of columns
+        row_data <- rep("nill", 19) # number of columns
       }
       else if (length(matches1) != 0){
         row_data <- as.matrix(taxon[taxon$scientificName %in% matches1,])
@@ -223,7 +223,7 @@ search_mammalcol(splist)
 
 ```
 
-#### Search database whith typos
+#### Search database with typos
 
 `mammalcol` has the ability to identify and correct minor typos and lower case in the genus. Correcting potential typos or variations in species names through fuzzy matching is a crucial aspect of data management. This technique ensures precise retrieval by adeptly identifying and accommodating minor differences in input names, thus enhancing the reliability of analyses conducted on diverse, inconsistent, and ensemble datasets. 
 

diff --git a/README.md b/README.md
@@ -82,7 +82,7 @@ search_mammalcol(splist)
 #> 4          Northern Pudu       17
 ```
 
-#### Search database whith typos
+#### Search database with typos
 
 `mammalcol` has the ability to identify and correct minor typos and
 lower case in the genus. Correcting potential typos or variations in

diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
@@ -2,7 +2,7 @@ pandoc: 3.1.1
 pkgdown: 2.0.7
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-03-26T05:55Z
+last_built: 2024-03-27T06:37Z
 urls:
   reference: https://dlizcano.github.io/mammalcol/reference
   article: https://dlizcano.github.io/mammalcol/articles

diff --git a/docs/reference/distribution.html b/docs/reference/distribution.html