Adding BFO data (version 0.0.2)

LimaRAF · Jul 15, 2024 · 24f5eaa · 24f5eaa
1 parent 3e7dd5f
commit 24f5eaa
Show file tree

Hide file tree

Showing 15 changed files with 275 additions and 56 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,3 +11,4 @@
 ^CITATION\.cff$
 ^make\.R$
 ^data-raw$
+^data$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: plantRdata
 Type: Package
 Title: Accessory Datasets for Package plantR
-Version: 0.0.1
+Version: 0.0.2
 Authors@R: c(
     person(given   = "Renato A.",
            family  = "Ferreira de Lima",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# plantRdata 0.0.2
+
+* Addition of the another taxonomic backbones: BFO
+
+* Updating WCVP backbone
+
 # plantRdata 0.0.1
 
 * Creation of the package and adding the `NEWS.md` file to track changes.

diff --git a/R/bfoNames.R b/R/bfoNames.R
@@ -1,30 +1,30 @@
-#' @title Brazilian Plant Taxonomy
+#' @title Brazilian Vascular Plant Taxonomy
 #'
 #' @description A dataset containing the most relevant taxonomic
-#'   information of the kingdom Plantae stored in the 
-#'   [Brazilian Flora 2020](https://floradobrasil.jbrj.gov.br/consulta/)
+#'   information of the vascular plants (i.e. Tracheophyta) stored in
+#'   the [Brazilian Flora 2020](https://floradobrasil.jbrj.gov.br/consulta/) 
 #'   taxonomic backbone, a.k.a. the Flora and Funga of Brazil. It
 #'   includes all taxonomic levels (i.e. infra-species, species,
 #'   genus, family and so on).\cr\cr The original backbone was
 #'   slightly edited aiming to standardize the notation across all
-#'   backbones in `plantRdata`. Many columns in the original
-#'   source that were not essential for the process of taxon name
-#'   checking were removed and most of them were renamed. Duplicated
-#'   scientific names (i.e. taxon name + author) whose taxon status
-#'   were not accepted were also removed to avoid possible problems in
-#'   the taxon name matching process.
+#'   backbones in `plantRdata`. Many columns in the original source
+#'   that were not essential for the process of taxon name checking
+#'   were removed and most of them were renamed. Duplicated scientific
+#'   names (i.e. taxon name + author) whose taxon status were not
+#'   accepted were also removed to avoid possible problems in the
+#'   taxon name matching process.
 #'
 #' @keywords datasets
-#' @name bfoNamesPlantae
-#' @usage data(bfoNamesPlantae)
+#' @name bfoNamesTracheophyta
+#' @usage data(bfoNamesTracheophyta)
 #' @source \url{https://ipt.jbrj.gov.br/jbrj/resource?r=lista_especies_flora_brasil}
 #' @evalRd .readScript("data-raw/bfo/last_update.txt", 
-#'  "Last update/change of the downloaded backbone (year-month-day):")
+#'  "Last update/change of the downloaded backbone (day month year):")
 #' @format An object of class \code{data.frame} with 14 columns and
-#'   over 140 thousand rows.
+#'   over 125 thousand rows.
 #' @evalRd .readScript("data-raw/bfo/citation.txt", "", "references")
 #'  
-"bfoNamesPlantae"
+"bfoNamesTracheophyta"
 
 #' @title Brazilian Fungi Taxonomy
 #'
@@ -47,9 +47,63 @@
 #' @usage data(bfoNamesFungi)
 #' @source \url{https://ipt.jbrj.gov.br/jbrj/resource?r=lista_especies_flora_brasil}
 #' @evalRd .readScript("data-raw/bfo/last_update.txt", 
-#'  "Last update/change of the downloaded backbone (year-month-day):")
+#'  "Last update/change of the downloaded backbone (day month year):")
 #' @format An object of class \code{data.frame} with 14 columns and
-#'   over 11 thousand rows.
+#'   almost 12 thousand rows.
 #' @evalRd .readScript("data-raw/bfo/citation.txt", "", "references")
 #'  
 "bfoNamesFungi"
+
+#' @title Brazilian Algae Taxonomy
+#'
+#' @description A dataset containing the most relevant taxonomic
+#'   information of Algae stored in
+#'   the [Brazilian Flora 2020](https://floradobrasil.jbrj.gov.br/consulta/) 
+#'   taxonomic backbone, a.k.a. the Flora and Funga of Brazil. It
+#'   includes all taxonomic levels (i.e. infra-species, species,
+#'   genus, family and so on).\cr\cr The original backbone was
+#'   slightly edited aiming to standardize the notation across all
+#'   backbones in `plantRdata`. Many columns in the original source
+#'   that were not essential for the process of taxon name checking
+#'   were removed and most of them were renamed. Duplicated scientific
+#'   names (i.e. taxon name + author) whose taxon status were not
+#'   accepted were also removed to avoid possible problems in the
+#'   taxon name matching process.
+#'
+#' @keywords datasets
+#' @name bfoNamesAlgae
+#' @usage data(bfoNamesAlgae)
+#' @source \url{https://ipt.jbrj.gov.br/jbrj/resource?r=lista_especies_flora_brasil}
+#' @evalRd .readScript("data-raw/bfo/last_update.txt", 
+#'  "Last update/change of the downloaded backbone (day month year):")
+#' @format An object of class \code{data.frame} with 14 columns and
+#'   about one thousand rows.
+#' @evalRd .readScript("data-raw/bfo/citation.txt", "", "references")
+"bfoNamesAlgae"
+
+#' @title Brazilian Bryophyte Taxonomy
+#'
+#' @description A dataset containing the most relevant taxonomic
+#'   information of Bryophyta stored in
+#'   the [Brazilian Flora 2020](https://floradobrasil.jbrj.gov.br/consulta/) 
+#'   taxonomic backbone, a.k.a. the Flora and Funga of Brazil. It
+#'   includes all taxonomic levels (i.e. infra-species, species,
+#'   genus, family and so on).\cr\cr The original backbone was
+#'   slightly edited aiming to standardize the notation across all
+#'   backbones in `plantRdata`. Many columns in the original source
+#'   that were not essential for the process of taxon name checking
+#'   were removed and most of them were renamed. Duplicated scientific
+#'   names (i.e. taxon name + author) whose taxon status were not
+#'   accepted were also removed to avoid possible problems in the
+#'   taxon name matching process.
+#'
+#' @keywords datasets
+#' @name bfoNamesBryophyta
+#' @usage data(bfoNamesBryophyta)
+#' @source \url{https://ipt.jbrj.gov.br/jbrj/resource?r=lista_especies_flora_brasil}
+#' @evalRd .readScript("data-raw/bfo/last_update.txt", 
+#'  "Last update/change of the downloaded backbone (day month year):")
+#' @format An object of class \code{data.frame} with 14 columns and
+#'   about 5 thousand rows.
+#' @evalRd .readScript("data-raw/bfo/citation.txt", "", "references")
+"bfoNamesBryophyta"
diff --git a/data-raw/get_bfo.R b/data-raw/get_bfo.R
@@ -108,13 +108,13 @@ if (last_updated != last_download) {
 
   # Editing data --------------------------------------------------
   ## filtering and standardizing important column names
-  cols <- c("taxonID", "phylum", "family", 
+  cols <- c("taxonID", "higherClassification" ,"phylum", "family", 
             "taxon_name", "scientificNameAuthorship",
             "taxonRank", "nomenclaturalStatus", "taxonomicStatus", 
             "acceptedNameUsageID", "kingdom", "scientificName") 
 
   data <- as.data.frame(data)[, cols]
-  names(data) <- c("id", "phylum", "family", 
+  names(data) <- c("id", "higherClassification", "phylum", "family", 
                    "name", "authorship", 
                    "taxon.rank", "name.status", "taxon.status", 
                    "accepted.id", "kingdom", "scientific.name")
@@ -141,6 +141,7 @@ if (last_updated != last_download) {
 
   ## Organizing fields
   cols1 <- c("id",
+             "higherClassification", 
              "kingdom",
              "phylum",
              "family", # "genus", "specific.epiteth", "infra.epiteth",
@@ -165,14 +166,34 @@ if (last_updated != last_download) {
   data$accepted.taxon.status <- tolower(data$accepted.taxon.status)
   data$accepted.name.status <- tolower(data$accepted.name.status)
 
-
+  ## Higher class editing
+  data$higherClassification <- gsub("Flora e Funga;", "", data$higherClassification)
+  data$higherClassification <- gsub(";.*", "", data$higherClassification)
+  rep_these <- data$phylum %in% "Ascomycota" & 
+                data$higherClassification %in% "Angiospermas"
+  if (any(rep_these))
+    data$phylum[rep_these] <- "Tracheophyta" 
+
+  rep_these <- data$phylum %in% "Tracheophyta"
+  data$higherClassification[rep_these] <- "Tracheophyta"   
+  data$higherClassification[data$higherClassification %in% "Algas"] <- 
+    "Algae"
+  data$higherClassification[data$higherClassification %in% "Briófitas"] <- 
+    "Bryophyta"
+  data$higherClassification[data$higherClassification %in% "Fungos"] <- 
+    "Fungi"
+  table(data$higherClassification)
+
+
   # Saving ------------------------------------------------------------
-  reinos <- c("Plantae", "Fungi")
+  # reinos <- c("Plantae", "Fungi")
+  classes <- c("Tracheophyta", "Algae", "Bryophyta", "Fungi")
 
   ## Cleaning and re-ordering
-  data <- data[!data$name %in% c("", NA, " ", "NA", reinos), ]
+  data <- data[!data$name %in% c("", NA, " ", "NA"), ]
   data <- data[order(data$taxon.status), ]
   data <- data[!duplicated(paste0(data$kingdom, data$scientific.name)), ]
+  data <- data[!duplicated(paste0(data$higherClassification, data$scientific.name)), ]
   data <- data[order(data$id), ]
 
   ## Removing the combined name + authorship column
@@ -182,31 +203,53 @@ if (last_updated != last_download) {
   data$id <- paste0(backbone, "-", data$id)
 
   ## How many columns and lines (in May 2024: 153,089)
-  dimensions <- paste0(dim(data)[1], " rows and ", dim(data)[2], " columns")
+  # dimensions <- paste0(dim(data)[1], " rows and ", dim(data)[2], " columns")
 
   ## Saving
-  data_split <- split(data, data$kingdom)
-  data_split <- data_split[names(data_split) %in% reinos]
-  for (i in seq_along(reinos)) {
-    dimensions <- paste0(dim(data_split[[reinos[i]]])[1], 
-                         " rows and ", 
-                         dim(data_split[[reinos[i]]])[2], 
-                         " columns")
-    path_to_save <- file.path(here::here(), "data-raw", backbone, 
-                              paste0("df_dim_",reinos[i],".txt"))
-    write(dimensions, path_to_save)
+  data_split <- split(data, data$higherClassification)
+  data_split <- data_split[names(data_split) %in% classes]
+  for (i in seq_along(classes)) {
+
+    if (classes[i] == "Tracheophyta") {
+      bfoNamesTracheophyta <- data_split[[classes[i]]]
+      bfoNamesTracheophyta$higherClassification <- NULL
+
+      usethis::use_data(bfoNamesTracheophyta, compress = "xz", 
+                        overwrite = TRUE)
+    }
 
-    if (reinos[i] == "Plantae") {
-      bfoNamesPlantae <- data_split[[reinos[i]]]
-      usethis::use_data(bfoNamesPlantae, compress = "xz", 
+    if (classes[i] == "Algae") {
+      bfoNamesAlgae <- data_split[[classes[i]]]
+      bfoNamesAlgae$higherClassification <- NULL
+
+      usethis::use_data(bfoNamesAlgae, compress = "xz", 
                         overwrite = TRUE)
     }
 
-    if (reinos[i] == "Fungi") {
-      bfoNamesFungi <- data_split[[reinos[i]]]
+    if (classes[i] == "Bryophyta") {
+      bfoNamesBryophyta <- data_split[[classes[i]]]
+      bfoNamesBryophyta$higherClassification <- NULL
+
+      usethis::use_data(bfoNamesBryophyta, compress = "xz", 
+                        overwrite = TRUE)
+    }
+
+    if (classes[i] == "Fungi") {
+      bfoNamesFungi <- data_split[[classes[i]]]
+      bfoNamesFungi$higherClassification <- NULL
+
       usethis::use_data(bfoNamesFungi, compress = "xz", 
                         overwrite = TRUE)
     }
+
+    dimensions <- paste0(dim(data_split[[classes[i]]])[1], 
+                         " rows and ", 
+                         dim(data_split[[classes[i]]])[2], 
+                         " columns")
+    path_to_save <- file.path(here::here(), "data-raw", backbone, 
+                              paste0("df_dim_", classes[i],".txt"))
+    write(dimensions, path_to_save)
+
   }
   path_to_save <- file.path(here::here(), "data-raw", backbone, 
                             "last_update.txt")

diff --git a/data-raw/make.R b/data-raw/make.R
@@ -0,0 +1,43 @@
+#' Run plantRdata Data Creation Steps
+#' 
+#' @description 
+#' A paragraph providing a full description of the project and
+#' describing each step of the workflow.
+#' 
+#' @author Renato A. Ferreira de Lima \email{[email protected]}
+#' 
+#' @date 2024/04/26
+## Install Dependencies (listed in DESCRIPTION) ------------------
+devtools::install_deps(upgrade = "never")
+
+## Load Project Addins (R Functions and Packages) ----------------
+devtools::load_all(here::here())
+
+## Global Variables ----------------------------------------------
+# You can list global variables here (or in a separate R script)
+
+## Run Project ---------------------------------------------------
+# Creating the WFO taxonomic backbone
+source(here::here("data-raw", "get_wfo.R"))
+# Creating the WCVP taxonomic backbone
+source(here::here("data-raw", "get_wcvp.R"))
+# Creating the GBIF taxonomic backbone
+source(here::here("data-raw", "get_gbif.R"))
+# Creating the BFO taxonomic backbone
+source(here::here("data-raw", "get_bfo.R"))
+# Creating the LCVP taxonomic backbone
+# source(here::here("data-raw", "get_lcvp.R"))
+# Creating the GDAM polygons
+# source(here::here("data-raw", "get_gadm.R"))
+# Building the map at global scale
+# source(here::here("data-raw", "get_world_map.R"))
+# Building the map for latin america (central and south america + caribbeans)
+# source(here::here("data-raw", "get_latam_map.R"))
+
+## Build/Updating Package Manuals ---------------------------------
+devtools::document(here::here())
+
+# block <- roxygen2::parse_file("./R/wfoNames.R")[[1]]
+# block <- roxygen2::parse_file("./R/wfoNames.R")
+# roxygen2::roclet(block)
+
diff --git a/data/bfoNamesAlgae.rda b/data/bfoNamesAlgae.rda
diff --git a/data/bfoNamesBryophyta.rda b/data/bfoNamesBryophyta.rda
diff --git a/data/bfoNamesFungi.rda b/data/bfoNamesFungi.rda
diff --git a/data/bfoNamesPlantae.rda b/data/bfoNamesPlantae.rda
diff --git a/data/bfoNamesTracheophyta.rda b/data/bfoNamesTracheophyta.rda
diff --git a/man/bfoNamesPlantae.Rd → man/bfoNamesAlgae.Rd b/man/bfoNamesPlantae.Rd → man/bfoNamesAlgae.Rd