From c3c2807e18324519d226fa29f7453fcd9ea71a90 Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Mon, 5 Feb 2024 10:44:39 -0800
Subject: [PATCH 01/11] adding utils

---
 R/utils.R | 264 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 264 insertions(+)
 create mode 100644 R/utils.R

diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..ba6d325
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,264 @@
+#' @importFrom magrittr %>% %<>%
+
+pkg_globals <- new.env(parent = emptyenv())
+
+#' Get AGOL authentication token
+#'
+#' Treat this token as you would a password: don't hard-code it in your scripts or save it to a file. It will expire after 60 minutes.
+#'
+#' @param agol_username AGOL headless account username
+#' @param agol_password AGOL headless account password (do not hard code this into your scripts!)
+#' @param root NPS users should keep the default. See <https://developers.arcgis.com/rest/users-groups-and-items/root.htm> for more information.
+#' @param referer NPS users should keep the default. See <https://developers.arcgis.com/rest/users-groups-and-items/generate-token.htm> for more information.
+#'
+#' @return An AGOL authentication token
+#' @export
+#'
+fetchAGOLToken <- function(agol_username, agol_password = keyring::key_get(service = "AGOL", username = agol_username), root = "nps.maps.arcgis.com", referer = "https://irma.nps.gov") {
+
+  url <- paste0("https://", root, "/sharing/rest/generateToken")
+
+  # Get a token with a headless account
+  token_resp <- httr::POST(url,
+                           body = list(username = agol_username,
+                                       password = agol_password,
+                                       expiration = 60,
+                                       referer = referer,
+                                       f = 'json'),
+                           encode = "form")
+  agol_token <- jsonlite::fromJSON(httr::content(token_resp, type="text", encoding = "UTF-8"))
+
+  return(agol_token)
+}
+
+#' Fetch tabular data from AGOL
+#'
+#' Retrieves tabular data from AGOL layers and tables, even when number of rows exceeds maximum record count.
+#'
+#' @param data_path Feature service URL
+#' @param layer_number Layer number
+#' @param token Authentication token (not needed for public layers)
+#' @param geometry Include spatial data columns? Works with points, not tested with other geometry types
+#' @param where Query clause specifying a subset of rows (optional; defaults to all rows). See AGOL REST API documentation.
+#' @param outFields String indicating which fields to return (optional; defaults to all fields). See AGOL REST API documentation.
+#'
+#' @return A tibble
+#' @export
+#'
+fetchAllRecords <- function(data_path, layer_number, token, geometry = FALSE, where = "1=1", outFields = "*") {
+  result <- tibble::tibble()
+  exc_transfer <- TRUE
+  offset <- nrow(result)
+
+  qry <- list(where = where,
+              outFields = outFields,
+              f = "JSON",
+              resultOffset = offset)
+
+  if (!missing(token)) {
+    qry$token <- token$token
+  }
+
+  while(exc_transfer) {
+    resp <- httr::GET(paste0(data_path, "/", layer_number, "/query"),
+                      query = qry)
+
+    content <- jsonlite::fromJSON(httr::content(resp, type = "text", encoding = "UTF-8"))
+
+    if ("error" %in% names(content)) {
+      message <- glue::glue("Error code {content$error$code}: {content$error$message}")
+      if ((content$error$message != content$error$details) && (content$error$details != '')) {
+        message <- c(message, glue::glue("Details: {content$error$details}"))
+      }
+      names(message) <- rep("x", length(message))
+      cli::cli_abort(message)
+    }
+
+    if ("exceededTransferLimit" %in% names(content)) {
+      exc_transfer <- content$exceededTransferLimit
+    } else {
+      exc_transfer <- FALSE
+    }
+
+    if (geometry) {
+      partial_result <- cbind(content$features$attributes, content$features$geometry) %>%
+        dplyr::mutate(wkid = content$spatialReference$wkid) %>%
+        tibble::as_tibble()
+    } else {
+      partial_result <- tibble::as_tibble(content$features$attributes)
+    }
+    result <- rbind(result, partial_result)
+    offset <- nrow(result)
+    qry$resultOffset <- offset
+  }
+  return(result)
+}
+
+#' Fetch metadata from AGOL
+#'
+#' Retrieves metadata from AGOL layers and tables.
+#'
+#' @param url Feature service URL
+#' @param layer_number Optional layer ID
+#' @param token Authentication token (not needed for public layers)
+#'
+#' @return A list
+#' @export
+#'
+fetchMetadata <- function(url, token, layer_number) {
+
+  if (!missing(layer_number)) {
+    url <- paste0(url, "/", layer_number, "/metadata")
+  } else {
+    url <- paste0(url, "/info/metadata")
+  }
+
+  # Get metadata
+  if (!missing(token)) {
+    resp <- httr::GET(url,
+                      query = list(token = token$token,
+                                   format = "fgdc",
+                                   f = "xml"))
+  } else {
+    resp <- httr::GET(url)
+  }
+  content <- httr::content(resp, type = "text/xml", encoding = "UTF-8")
+  metadata <- xml2::as_list(content)
+  metadata <- wrangleLayerMetadata(metadata$metadata, token)
+
+  return(metadata)
+}
+
+wrangleMetadata <- function(raw_meta) {
+  meta <- lapply(raw_meta$eainfo, function(entity) {
+    table_name <- entity$detailed$enttyp$enttypl[[1]]
+    item_meta <- list(table_name = list(table_description = entity$detailed$enttyp$enttypd[[1]]))
+  })
+}
+
+wrangleLayerMetadata <- function(raw_meta, token) {
+  # Field level metadata
+  fields <- lapply(raw_meta$eainfo$detailed[2:length(raw_meta$eainfo$detailed)], function(field) {
+    field_name <- field$attrlabl[[1]]
+    desc <- parseAttrDef(field$attrdef[[1]])
+    try({
+      lookup_name <- trimws(field$attrdomv$codesetd$codesetn[[1]])
+      lookup_url <- trimws(field$attrdomv$codesetd$codesets[[1]])
+      lookup_df <- fetchHostedCSV(stringr::str_remove(lookup_url, "^.*?id="), token)
+      desc$lookup <- list(lookup_name = lookup_name,
+                          lookup_url = lookup_url,
+                          lookup_df = lookup_df)
+    }, silent = TRUE)
+    item_meta <- list()
+    item_meta[[field_name]] <- desc
+    return(item_meta)
+  })
+
+  # simplify list
+  fields <- purrr::flatten(fields)
+
+  # Table level metadata
+  table_name <- trimws(raw_meta$eainfo$detailed[1]$enttyp$enttypl[[1]])
+  table_desc <- trimws(raw_meta$eainfo$detailed[1]$enttyp$enttypd[[1]])
+
+  meta <- list(table_name = table_name,
+               table_description = table_desc,
+               fields = fields)
+
+  return(meta)
+}
+
+parseAttrDef <- function(def) {
+  attrs <- list()
+  if (!is.null(def)) {
+    description <- trimws(stringr::str_remove_all(def, "\\{.*\\}"))
+  } else {
+    description <- NA
+  }
+
+  if (any(grepl("\\{.*\\}", def))) {
+    starts <- stringr::str_locate_all(def, "\\{")[[1]][, 1]
+    ends <- stringr::str_locate_all(def, "\\}")[[1]][, 1]
+
+    for (i in 1:length(starts)) {
+      start <- starts[i] + 1
+      end <- ends[i] - 1
+      name_value <- trimws(strsplit(substr(def, start, end), ":")[[1]])
+      attrs[[name_value[1]]] <- name_value[2]
+    }
+  }
+  def_list <- list(description = description,
+                   attributes = attrs)
+
+  return(def_list)
+}
+
+#' Fetch feature service info from AGOL
+#'
+#' Retrieves metadata from AGOL layers and tables.
+#'
+#' @param url Feature service URL
+#' @param token Authentication token (not needed for public layers)
+#'
+#' @return A list
+#' @export
+#'
+fetchLayerAndTableList <- function(url, token) {
+
+  qry <- list(f = "json")
+
+  # Get feature service info
+  if (!missing(token)) {
+    qry$token <- token$token
+  }
+
+  resp <- httr::GET(url,
+                    query = qry)
+
+  content <- httr::content(resp, type = "text/json", encoding = "UTF-8")
+  feature_service <- jsonlite::fromJSON(content)
+
+  # TODO: there are no aspen feature layers in the aspen database, but there is one separate feature layer that we will need to import later
+
+  # Get layer id's and names
+  if (hasName(feature_service, "layers") & length(feature_service$layers) > 0) {
+    layers <- dplyr::select(feature_service$layers, id, name)
+  } else {
+    layers <- tibble::tibble(.rows = 0)
+  }
+
+  # Get table id's and names
+  if (hasName(feature_service, "tables") & length(feature_service$tables) > 0) {
+    tables <- dplyr::select(feature_service$tables, id, name)
+  } else {
+    tables <- tibble::tibble(.rows = 0)
+  }
+
+  layers_tables <- rbind(layers, tables)
+  # layers_tables <- layers
+
+  return(layers_tables)
+}
+
+fetchRawAspen <- function(aspen_database_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer", agol_username = "mojn_data", agol_password = keyring::key_get(service = "AGOL", username = agol_username)) {
+  token <- fetchAGOLToken(agol_username, agol_password)
+  layers_tables <- fetchLayerAndTableList(aspen_database_url, token)
+  ids <- layers_tables$id
+  names(ids) <- layers_tables$name
+
+  metadata <- sapply(ids, function(id) {
+    meta <- fetchMetadata(aspen_database_url, token, id)
+    meta[["table_id"]] <- id
+    return(meta)
+  }, simplify = FALSE, USE.NAMES = TRUE)
+
+  data <- sapply(metadata, function(meta){
+    data_table <- fetchAllRecords(aspen_database_url, meta$table_id, token, outFields = paste(names(meta$fields), collapse = ",")) %>%
+      dplyr::select(dplyr::any_of(names(meta$fields)))
+    return(data_table)
+  }, simplify = FALSE, USE.NAMES = TRUE)
+
+  raw_data <- list(data = data,
+                   metadata = metadata)
+  return(raw_data)
+}

From c0e557da10b1e2895b6fdfe9e135f65b41a156e1 Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Mon, 5 Feb 2024 13:45:00 -0800
Subject: [PATCH 02/11] adding package documentation

---
 .Rbuildignore |  1 +
 .gitignore    |  3 +++
 DESCRIPTION   | 14 ++++++++------
 LICENSE.md    | 43 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+), 6 deletions(-)
 create mode 100644 LICENSE.md

diff --git a/.Rbuildignore b/.Rbuildignore
index 0fb64fa..005a054 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,2 +1,3 @@
 ^aspen\.Rproj$
 ^\.Rproj\.user$
+^LICENSE\.md$
diff --git a/.gitignore b/.gitignore
index cd67eac..5b6a065 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
 .Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/DESCRIPTION b/DESCRIPTION
index 26f2de6..58de126 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,12 +1,14 @@
 Package: aspen
 Title: What the Package Does (One Line, Title Case)
 Version: 0.0.0.9000
-Authors@R: 
-    person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: What the package does (one paragraph).
-License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
-    license
+Authors@R: Aspen Protocol Data Tools
+    c(person("Sarah", "Wright", email = "sarah_wright@nps.gov", role=c("aut","cre")), 
+           person("Irene", "Foster", email = "irene_foster@partner.nps.gov", role=c("aut"), ,
+           comment = c(ORCID = "0000-0001-9681-4786")))
+Description: This R package is intended to provide automated QA/QC, visualization, and
+    basic analysis and statistical summaries of data collected for the Aspen
+    monitoring protocol.
+License: CC0
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..139c68e
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,43 @@
+## creative commons
+
+# CC0 1.0 Universal
+
+CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
+
+### Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+    i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+
+    ii. moral rights retained by the original author(s) and/or performer(s);
+
+    iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+
+    iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+
+    v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
+
+    vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+
+    vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. __Limitations and Disclaimers.__
+
+    a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+
+    b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+
+    c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+
+    d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.

From be90091bb9716c655d58dca696de6baab7f64ac1 Mon Sep 17 00:00:00 2001
From: Wright <sewright@nps.gov>
Date: Tue, 6 Feb 2024 09:54:52 -0700
Subject: [PATCH 03/11] Set R project to generate roxygen documentation on
 clean & install

---
 mojn-aspen-rpackage.Rproj | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mojn-aspen-rpackage.Rproj b/mojn-aspen-rpackage.Rproj
index 497f8bf..270314b 100644
--- a/mojn-aspen-rpackage.Rproj
+++ b/mojn-aspen-rpackage.Rproj
@@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace

From d463a9b5a13d25637be9f527263d0816d65553ab Mon Sep 17 00:00:00 2001
From: Wright <sewright@nps.gov>
Date: Tue, 6 Feb 2024 09:55:08 -0700
Subject: [PATCH 04/11] Update description

---
 DESCRIPTION | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 58de126..39ce343 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,7 @@
 Package: aspen
-Title: What the Package Does (One Line, Title Case)
+Title: Aspen Protocol Data Tools
 Version: 0.0.0.9000
-Authors@R: Aspen Protocol Data Tools
-    c(person("Sarah", "Wright", email = "sarah_wright@nps.gov", role=c("aut","cre")), 
+Authors@R: c(person("Sarah", "Wright", email = "sarah_wright@nps.gov", role=c("aut","cre"), comment = c(ORCID = "0009-0004-5060-2189")), 
            person("Irene", "Foster", email = "irene_foster@partner.nps.gov", role=c("aut"), ,
            comment = c(ORCID = "0000-0001-9681-4786")))
 Description: This R package is intended to provide automated QA/QC, visualization, and

From 493bdbd33c90e9aa630c3dd2b68fe11def336c6c Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Thu, 8 Feb 2024 14:39:37 -0800
Subject: [PATCH 05/11] switch for using functions from fetchagol package

---
 DESCRIPTION                   |   3 +
 NAMESPACE                     |   7 +-
 R/utils.R                     | 315 +++++++++-------------------------
 man/fetchAGOLToken.Rd         |  28 ---
 man/fetchAllRecords.Rd        |  34 ----
 man/fetchAndWrangleAspen.Rd   |  25 +++
 man/fetchLayerAndTableList.Rd |  19 --
 man/fetchMetadata.Rd          |  21 ---
 8 files changed, 111 insertions(+), 341 deletions(-)
 delete mode 100644 man/fetchAGOLToken.Rd
 delete mode 100644 man/fetchAllRecords.Rd
 create mode 100644 man/fetchAndWrangleAspen.Rd
 delete mode 100644 man/fetchLayerAndTableList.Rd
 delete mode 100644 man/fetchMetadata.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 39ce343..aa29ceb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,3 +11,6 @@ License: CC0
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
+Imports: 
+    dplyr,
+    fetchagol
diff --git a/NAMESPACE b/NAMESPACE
index 5dbc7f9..6e231fd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
-export(fetchAGOLToken)
-export(fetchAllRecords)
-export(fetchLayerAndTableList)
-export(fetchMetadata)
+export(fetchAndWrangleAspen)
+import(dplyr)
+import(fetchagol)
 importFrom(magrittr,"%<>%")
 importFrom(magrittr,"%>%")
diff --git a/R/utils.R b/R/utils.R
index ba6d325..2684a2e 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,264 +1,109 @@
 #' @importFrom magrittr %>% %<>%
+#' @import dplyr
+#' @import fetchagol
 
 pkg_globals <- new.env(parent = emptyenv())
 
-#' Get AGOL authentication token
-#'
-#' Treat this token as you would a password: don't hard-code it in your scripts or save it to a file. It will expire after 60 minutes.
-#'
-#' @param agol_username AGOL headless account username
-#' @param agol_password AGOL headless account password (do not hard code this into your scripts!)
-#' @param root NPS users should keep the default. See <https://developers.arcgis.com/rest/users-groups-and-items/root.htm> for more information.
-#' @param referer NPS users should keep the default. See <https://developers.arcgis.com/rest/users-groups-and-items/generate-token.htm> for more information.
-#'
-#' @return An AGOL authentication token
-#' @export
-#'
-fetchAGOLToken <- function(agol_username, agol_password = keyring::key_get(service = "AGOL", username = agol_username), root = "nps.maps.arcgis.com", referer = "https://irma.nps.gov") {
-
-  url <- paste0("https://", root, "/sharing/rest/generateToken")
-
-  # Get a token with a headless account
-  token_resp <- httr::POST(url,
-                           body = list(username = agol_username,
-                                       password = agol_password,
-                                       expiration = 60,
-                                       referer = referer,
-                                       f = 'json'),
-                           encode = "form")
-  agol_token <- jsonlite::fromJSON(httr::content(token_resp, type="text", encoding = "UTF-8"))
 
-  return(agol_token)
-}
-
-#' Fetch tabular data from AGOL
-#'
-#' Retrieves tabular data from AGOL layers and tables, even when number of rows exceeds maximum record count.
+#' Fetch aspen data from AGOL and do preliminary data wrangling
 #'
-#' @param data_path Feature service URL
-#' @param layer_number Layer number
-#' @param token Authentication token (not needed for public layers)
-#' @param geometry Include spatial data columns? Works with points, not tested with other geometry types
-#' @param where Query clause specifying a subset of rows (optional; defaults to all rows). See AGOL REST API documentation.
-#' @param outFields String indicating which fields to return (optional; defaults to all fields). See AGOL REST API documentation.
+#' @param aspen_url URL to main AGOL aspen database
+#' @param site_url URL to AGOL database for aspen sites (if applicable)
+#' @param agol_username Authentication token (not needed for public layers)
 #'
-#' @return A tibble
+#' @return A list of data frames and metadata
 #' @export
-#'
-fetchAllRecords <- function(data_path, layer_number, token, geometry = FALSE, where = "1=1", outFields = "*") {
-  result <- tibble::tibble()
-  exc_transfer <- TRUE
-  offset <- nrow(result)
-
-  qry <- list(where = where,
-              outFields = outFields,
-              f = "JSON",
-              resultOffset = offset)
-
-  if (!missing(token)) {
-    qry$token <- token$token
-  }
 
-  while(exc_transfer) {
-    resp <- httr::GET(paste0(data_path, "/", layer_number, "/query"),
-                      query = qry)
 
-    content <- jsonlite::fromJSON(httr::content(resp, type = "text", encoding = "UTF-8"))
+# TODO make sure this works for everybody
+# TODO make mojn master site data optional
+# TODO should data be loaded from inside the function like now or input as a variable
+# TODO agol_password should maybe be added as a variable???
+# TODO can we remove creation, creationDate, editDate, and editor from all the tables??
+fetchAndWrangleAspen <- function(aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
+                                 site_url =  "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/AspenSites2/FeatureServer",
+                                 agol_username = "mojn_data") {
 
-    if ("error" %in% names(content)) {
-      message <- glue::glue("Error code {content$error$code}: {content$error$message}")
-      if ((content$error$message != content$error$details) && (content$error$details != '')) {
-        message <- c(message, glue::glue("Details: {content$error$details}"))
-      }
-      names(message) <- rep("x", length(message))
-      cli::cli_abort(message)
-    }
+  # Import aspen database and aspen site database
+  raw_data <- fetchagol::fetchRawData(aspen_url, agol_username)
+  raw_site_data <- fetchagol::fetchRawData(site_url, agol_username)
 
-    if ("exceededTransferLimit" %in% names(content)) {
-      exc_transfer <- content$exceededTransferLimit
-    } else {
-      exc_transfer <- FALSE
-    }
+  # Add site data to list of other data
+  raw_data$data$AllSites <- raw_site_data$data$`MOJN Aspen Sites Master`
+  # Join site metadata to list of other metadata
+  raw_data$metadata$AllSites <- raw_site_data$metadata$`MOJN Aspen Sites Master`
 
-    if (geometry) {
-      partial_result <- cbind(content$features$attributes, content$features$geometry) %>%
-        dplyr::mutate(wkid = content$spatialReference$wkid) %>%
-        tibble::as_tibble()
-    } else {
-      partial_result <- tibble::as_tibble(content$features$attributes)
-    }
-    result <- rbind(result, partial_result)
-    offset <- nrow(result)
-    qry$resultOffset <- offset
-  }
-  return(result)
-}
+  raw_data <- fetchagol::cleanData(raw_data)
 
-#' Fetch metadata from AGOL
-#'
-#' Retrieves metadata from AGOL layers and tables.
-#'
-#' @param url Feature service URL
-#' @param layer_number Optional layer ID
-#' @param token Authentication token (not needed for public layers)
-#'
-#' @return A list
-#' @export
-#'
-fetchMetadata <- function(url, token, layer_number) {
+  flattened_data <- list(data = list(),
+                         metadata = list())
 
-  if (!missing(layer_number)) {
-    url <- paste0(url, "/", layer_number, "/metadata")
-  } else {
-    url <- paste0(url, "/info/metadata")
-  }
 
-  # Get metadata
-  if (!missing(token)) {
-    resp <- httr::GET(url,
-                      query = list(token = token$token,
-                                   format = "fgdc",
-                                   f = "xml"))
-  } else {
-    resp <- httr::GET(url)
-  }
-  content <- httr::content(resp, type = "text/xml", encoding = "UTF-8")
-  metadata <- xml2::as_list(content)
-  metadata <- wrangleLayerMetadata(metadata$metadata, token)
+  # Join background site information with other tables
+  flattened_data$data$AllSites <- raw_data$data$AllSites
+  flattened_data$data$SiteVisit <- raw_data$data$SiteVisit
 
-  return(metadata)
-}
+  flattened_data$data$Disturbances <- raw_data$data$SiteVisit %>%
+    dplyr::mutate(parentglobalid = globalid) %>%
+    dplyr::select(parentglobalid, Park, Site, VisitType, VisitDate, FieldSeason) %>%
+    dplyr::right_join(raw_data$data$Disturbances, by = c("parentglobalid" = "parentglobalid"))
 
-wrangleMetadata <- function(raw_meta) {
-  meta <- lapply(raw_meta$eainfo, function(entity) {
-    table_name <- entity$detailed$enttyp$enttypl[[1]]
-    item_meta <- list(table_name = list(table_description = entity$detailed$enttyp$enttypd[[1]]))
-  })
-}
+  flattened_data$data$Observations <- raw_data$data$SiteVisit %>%
+    dplyr::mutate(parentglobalid = globalid) %>%
+    dplyr::select(parentglobalid, Park, Site, VisitType, VisitDate, FieldSeason) %>%
+    dplyr::right_join(raw_data$data$Observations, by = c("parentglobalid" = "parentglobalid"))
 
-wrangleLayerMetadata <- function(raw_meta, token) {
-  # Field level metadata
-  fields <- lapply(raw_meta$eainfo$detailed[2:length(raw_meta$eainfo$detailed)], function(field) {
-    field_name <- field$attrlabl[[1]]
-    desc <- parseAttrDef(field$attrdef[[1]])
-    try({
-      lookup_name <- trimws(field$attrdomv$codesetd$codesetn[[1]])
-      lookup_url <- trimws(field$attrdomv$codesetd$codesets[[1]])
-      lookup_df <- fetchHostedCSV(stringr::str_remove(lookup_url, "^.*?id="), token)
-      desc$lookup <- list(lookup_name = lookup_name,
-                          lookup_url = lookup_url,
-                          lookup_df = lookup_df)
-    }, silent = TRUE)
-    item_meta <- list()
-    item_meta[[field_name]] <- desc
-    return(item_meta)
-  })
+  flattened_data$data$Pests <- flattened_data$data$Observations %>%
+    dplyr::mutate(parentglobalid = globalid) %>%
+    dplyr::select(parentglobalid, Park, Site, VisitType, VisitDate, FieldSeason, SpeciesCode) %>%
+    dplyr::right_join(raw_data$data$Pests, by = c("parentglobalid" = "parentglobalid"))
 
-  # simplify list
-  fields <- purrr::flatten(fields)
+  # TODO Figure out what to do with metadata, in iu its left empty
+  flattened_data$metadata <- raw_data$metadata
 
-  # Table level metadata
-  table_name <- trimws(raw_meta$eainfo$detailed[1]$enttyp$enttypl[[1]])
-  table_desc <- trimws(raw_meta$eainfo$detailed[1]$enttyp$enttypd[[1]])
 
-  meta <- list(table_name = table_name,
-               table_description = table_desc,
-               fields = fields)
-
-  return(meta)
+  return(flattened_data)
 }
 
-parseAttrDef <- function(def) {
-  attrs <- list()
-  if (!is.null(def)) {
-    description <- trimws(stringr::str_remove_all(def, "\\{.*\\}"))
-  } else {
-    description <- NA
-  }
-
-  if (any(grepl("\\{.*\\}", def))) {
-    starts <- stringr::str_locate_all(def, "\\{")[[1]][, 1]
-    ends <- stringr::str_locate_all(def, "\\}")[[1]][, 1]
-
-    for (i in 1:length(starts)) {
-      start <- starts[i] + 1
-      end <- ends[i] - 1
-      name_value <- trimws(strsplit(substr(def, start, end), ":")[[1]])
-      attrs[[name_value[1]]] <- name_value[2]
+formatMetadataAsEML <- function(meta, token) {
+  attrs <- sapply(names(meta), function(attr_name) {
+    meta_list <- meta[[attr_name]]
+    if (meta_list$attributes$class == "date") {
+      format_string <- "YYYY-MM-DD"
+    } else if (meta_list$attributes$class == "dateTime") {
+      format_string <- "YYYY-MM-DDThh:mm:ss"
+    } else {
+      format_string <- NA
     }
-  }
-  def_list <- list(description = description,
-                   attributes = attrs)
-
-  return(def_list)
-}
-
-#' Fetch feature service info from AGOL
-#'
-#' Retrieves metadata from AGOL layers and tables.
-#'
-#' @param url Feature service URL
-#' @param token Authentication token (not needed for public layers)
-#'
-#' @return A list
-#' @export
-#'
-fetchLayerAndTableList <- function(url, token) {
-
-  qry <- list(f = "json")
-
-  # Get feature service info
-  if (!missing(token)) {
-    qry$token <- token$token
-  }
-
-  resp <- httr::GET(url,
-                    query = qry)
-
-  content <- httr::content(resp, type = "text/json", encoding = "UTF-8")
-  feature_service <- jsonlite::fromJSON(content)
-
-  # TODO: there are no aspen feature layers in the aspen database, but there is one separate feature layer that we will need to import later
-
-  # Get layer id's and names
-  if (hasName(feature_service, "layers") & length(feature_service$layers) > 0) {
-    layers <- dplyr::select(feature_service$layers, id, name)
-  } else {
-    layers <- tibble::tibble(.rows = 0)
-  }
-
-  # Get table id's and names
-  if (hasName(feature_service, "tables") & length(feature_service$tables) > 0) {
-    tables <- dplyr::select(feature_service$tables, id, name)
-  } else {
-    tables <- tibble::tibble(.rows = 0)
-  }
-
-  layers_tables <- rbind(layers, tables)
-  # layers_tables <- layers
+    attributes_df <- tibble::tibble(attributeName = attr_name,
+                                    attributeDefinition = meta_list$description,
+                                    unit = c(meta_list$attributes$unit, NA)[1],
+                                    class = meta_list$attributes$class,
+                                    dateTimeFormatString = format_string,
+                                    missingValueCode = c(meta_list$attributes$missing_value_code, NA)[1],
+                                    missingValueCodeExplanation = c(meta_list$attributes$missing_value_code_exp, NA)[1])
+
+    return(attrs)
+  }, simplify = TRUE)
+
+  catvars <- sapply(names(meta), function(attr_name) {
+    if (length(meta_list$lookup$lookup_url) > 0) {
+      catvars_df <- meta_list$lookup$lookup_df %>%
+        dplyr::mutate(attributeName = attr_name) %>%
+        dplyr::select(attributeName, code = name, definition = description)
+    } else {
+      catvars_df <- NULL
+    }
+    return(catvars_df)
+  }, simplify = TRUE)
 
-  return(layers_tables)
 }
 
-fetchRawAspen <- function(aspen_database_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer", agol_username = "mojn_data", agol_password = keyring::key_get(service = "AGOL", username = agol_username)) {
-  token <- fetchAGOLToken(agol_username, agol_password)
-  layers_tables <- fetchLayerAndTableList(aspen_database_url, token)
-  ids <- layers_tables$id
-  names(ids) <- layers_tables$name
 
-  metadata <- sapply(ids, function(id) {
-    meta <- fetchMetadata(aspen_database_url, token, id)
-    meta[["table_id"]] <- id
-    return(meta)
-  }, simplify = FALSE, USE.NAMES = TRUE)
-
-  data <- sapply(metadata, function(meta){
-    data_table <- fetchAllRecords(aspen_database_url, meta$table_id, token, outFields = paste(names(meta$fields), collapse = ",")) %>%
-      dplyr::select(dplyr::any_of(names(meta$fields)))
-    return(data_table)
-  }, simplify = FALSE, USE.NAMES = TRUE)
-
-  raw_data <- list(data = data,
-                   metadata = metadata)
-  return(raw_data)
-}
+# fetchHostedCSV <- function(item_id, token, root = "nps.maps.arcgis.com") {
+#   url <- paste0("https://", root, "/sharing/rest/content/items/", item_id, "/data")
+#   resp <- httr::GET(url, query = list(token = token$token))
+#   content <- httr::content(resp, type = "text/csv", encoding = "UTF-8")
+#
+#   return(content)
+# }
diff --git a/man/fetchAGOLToken.Rd b/man/fetchAGOLToken.Rd
deleted file mode 100644
index 07e98d2..0000000
--- a/man/fetchAGOLToken.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{fetchAGOLToken}
-\alias{fetchAGOLToken}
-\title{Get AGOL authentication token}
-\usage{
-fetchAGOLToken(
-  agol_username,
-  agol_password = keyring::key_get(service = "AGOL", username = agol_username),
-  root = "nps.maps.arcgis.com",
-  referer = "https://irma.nps.gov"
-)
-}
-\arguments{
-\item{agol_username}{AGOL headless account username}
-
-\item{agol_password}{AGOL headless account password (do not hard code this into your scripts!)}
-
-\item{root}{NPS users should keep the default. See \url{https://developers.arcgis.com/rest/users-groups-and-items/root.htm} for more information.}
-
-\item{referer}{NPS users should keep the default. See \url{https://developers.arcgis.com/rest/users-groups-and-items/generate-token.htm} for more information.}
-}
-\value{
-An AGOL authentication token
-}
-\description{
-Treat this token as you would a password: don't hard-code it in your scripts or save it to a file. It will expire after 60 minutes.
-}
diff --git a/man/fetchAllRecords.Rd b/man/fetchAllRecords.Rd
deleted file mode 100644
index 56e4a68..0000000
--- a/man/fetchAllRecords.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{fetchAllRecords}
-\alias{fetchAllRecords}
-\title{Fetch tabular data from AGOL}
-\usage{
-fetchAllRecords(
-  data_path,
-  layer_number,
-  token,
-  geometry = FALSE,
-  where = "1=1",
-  outFields = "*"
-)
-}
-\arguments{
-\item{data_path}{Feature service URL}
-
-\item{layer_number}{Layer number}
-
-\item{token}{Authentication token (not needed for public layers)}
-
-\item{geometry}{Include spatial data columns? Works with points, not tested with other geometry types}
-
-\item{where}{Query clause specifying a subset of rows (optional; defaults to all rows). See AGOL REST API documentation.}
-
-\item{outFields}{String indicating which fields to return (optional; defaults to all fields). See AGOL REST API documentation.}
-}
-\value{
-A tibble
-}
-\description{
-Retrieves tabular data from AGOL layers and tables, even when number of rows exceeds maximum record count.
-}
diff --git a/man/fetchAndWrangleAspen.Rd b/man/fetchAndWrangleAspen.Rd
new file mode 100644
index 0000000..b6f32a2
--- /dev/null
+++ b/man/fetchAndWrangleAspen.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{fetchAndWrangleAspen}
+\alias{fetchAndWrangleAspen}
+\title{Fetch feature service info from AGOL}
+\usage{
+fetchAndWrangleAspen(
+  aspen_url =
+    "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
+  site_url =
+    "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/AspenSites2/FeatureServer",
+  agol_username = "mojn_data"
+)
+}
+\arguments{
+\item{url}{Feature service URL}
+
+\item{token}{Authentication token (not needed for public layers)}
+}
+\value{
+A list
+}
+\description{
+Retrieves metadata from AGOL layers and tables.
+}
diff --git a/man/fetchLayerAndTableList.Rd b/man/fetchLayerAndTableList.Rd
deleted file mode 100644
index 15261de..0000000
--- a/man/fetchLayerAndTableList.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{fetchLayerAndTableList}
-\alias{fetchLayerAndTableList}
-\title{Fetch feature service info from AGOL}
-\usage{
-fetchLayerAndTableList(url, token)
-}
-\arguments{
-\item{url}{Feature service URL}
-
-\item{token}{Authentication token (not needed for public layers)}
-}
-\value{
-A list
-}
-\description{
-Retrieves metadata from AGOL layers and tables.
-}
diff --git a/man/fetchMetadata.Rd b/man/fetchMetadata.Rd
deleted file mode 100644
index 54215e5..0000000
--- a/man/fetchMetadata.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{fetchMetadata}
-\alias{fetchMetadata}
-\title{Fetch metadata from AGOL}
-\usage{
-fetchMetadata(url, token, layer_number)
-}
-\arguments{
-\item{url}{Feature service URL}
-
-\item{token}{Authentication token (not needed for public layers)}
-
-\item{layer_number}{Optional layer ID}
-}
-\value{
-A list
-}
-\description{
-Retrieves metadata from AGOL layers and tables.
-}

From 6895378b362b40f55b426199d73beaf37e3ffd43 Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Tue, 13 Feb 2024 14:55:15 -0800
Subject: [PATCH 06/11] add begining of quality control and visualizations

---
 R/aspen_qc.R                                  |  52 ++
 scripts/visualizations/aspen_qc_results.qmd   |  63 ++
 .../visualizations/aspen_visualizations.qmd   | 823 ++++++++++++++++++
 3 files changed, 938 insertions(+)
 create mode 100644 R/aspen_qc.R
 create mode 100644 scripts/visualizations/aspen_qc_results.qmd
 create mode 100644 scripts/visualizations/aspen_visualizations.qmd

diff --git a/R/aspen_qc.R b/R/aspen_qc.R
new file mode 100644
index 0000000..c7750ad
--- /dev/null
+++ b/R/aspen_qc.R
@@ -0,0 +1,52 @@
+#' @importFrom magrittr %>% %<>%
+
+#' Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+missingTreeQC <- function(data = fetchAndWrangleAspen()){
+
+  missingTreeQC <- data$data$Observations %>%
+    # Add up all trees in a row
+    dplyr::mutate(totalTreeCount = Class1+Class2+Class3+Class4+Class5+Class6) %>%
+    # Filter for any rows that have less than one trees or more than 250 trees
+    dplyr::filter(totalTreeCount < 1 | totalTreeCount > 250)
+
+  return(missingTreeQC)
+}
+
+#' Check that no entries in the observation table have a SpeciesCode that is NA or missing
+treeSpeciesQC <- function(data = fetchAndWrangleAspen()){
+
+  treeSpeciesQC <- data$data$Observations %>%
+    dplyr::filter(SpeciesCode == "" | is.na(SpeciesCode) | SpeciesCode == "NA")
+
+  return(treeSpeciesQC)
+}
+
+#' Check that unknown species entries do not have any live trees
+unknownSpeciesQC <- function(data = fetchAndWrangleAspen()){
+
+  unknownSpeciesQC <- data$data$Observations %>%
+    dplyr::filter(SpeciesCode == "UNK") %>%
+    dplyr::filter(Class1 != 0 | Class2 != 0 | Class3 != 0 | Class4 != 0 | Class5 != 0)
+
+  return(unknownSpeciesQC)
+}
+
+#' Check that all of the plots have at least one aspen each time they are visited
+checkAspenQC <- function(data = fetchAndWrangleAspen()){
+
+  checkAspenQC <- data$data$Observations %>%
+    # Filter for aspen trees
+    dplyr::filter(SpeciesCode == "POTR5") %>%
+    # Join observations and visits
+    dplyr::full_join(data$data$SiteVisit, by = c("parentglobalid" = "globalid")) %>%
+    # Filter data for any visits that don't have aspen observations
+    dplyr::filter(is.na(SpeciesCode))
+
+  return(checkAspenQC)
+}
+
+
+
+
+
+
diff --git a/scripts/visualizations/aspen_qc_results.qmd b/scripts/visualizations/aspen_qc_results.qmd
new file mode 100644
index 0000000..b2c9146
--- /dev/null
+++ b/scripts/visualizations/aspen_qc_results.qmd
@@ -0,0 +1,63 @@
+---
+title: "Aspen Quality Control"
+format: 
+  html:
+    embed-resources: true
+execute: 
+  echo: false
+  message: false
+  warning: false
+---
+
+```{r}
+library(tidyverse)
+library(aspen)
+```
+
+
+##### Check Trees
+Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+
+```{r}
+if(nrow(aspen:::missingTreeQC()) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::missingTreeQC())
+}
+```
+
+
+##### Check for Missing Species
+Check that no entries in the observation table have a SpeciesCode that is NA or missing
+
+```{r}
+if(nrow(aspen:::treeSpeciesQC()) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::treeSpeciesQC())
+}
+```
+
+
+##### Check Unknown Trees
+Check that unknown species entries do not have any live trees
+
+```{r}
+if(nrow(aspen:::unknownSpeciesQC()) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::unknownSpeciesQC())
+}
+```
+
+
+##### Check for Aspens
+Check that all of the plots have at least one aspen each time they are visited
+
+```{r}
+if(nrow(aspen:::checkAspenQC()) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::checkAspenQC())
+}
+```
diff --git a/scripts/visualizations/aspen_visualizations.qmd b/scripts/visualizations/aspen_visualizations.qmd
new file mode 100644
index 0000000..4338e6e
--- /dev/null
+++ b/scripts/visualizations/aspen_visualizations.qmd
@@ -0,0 +1,823 @@
+---
+title: "Aspen Preliminary Visualizations"
+format: 
+  html:
+    embed-resources: true
+execute: 
+  echo: false
+  message: false
+  warning: false
+---
+
+```{r, message = FALSE}
+library(tidyverse)
+library(khroma)
+aspen <- aspen::fetchAndWrangleAspen()
+```
+
+```{r}
+# find number of plots in each watershed
+
+# TODO: update this to Allsites tables once not used sites are removed
+
+aspen$data$AllSites <- aspen$data$AllSites %>%
+  filter(Site %in% aspen$data$SiteVisit$Site) %>%
+      # Find Number of plot in each watershed
+  group_by(Stratum) %>%
+  mutate(numberOfPlotsInWatershed = n()) %>%
+  ungroup() %>%
+  group_by(Community) %>%
+  # Find number of plots in each community
+  mutate(numberOfPlotsInCommunity = n()) %>%
+  ungroup()
+```
+
+
+# Vegetation Community
+
+#### Density of Live Aspens
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# density = #/ha
+
+aspenDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Community) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+
+
+aspenDensityGraph <- aspenDensity %>%
+  ggplot(aes(x= Community, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Community", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(aspenDensityGraph)
+```
+
+#### Density of Live Aspens per Plot
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# density = #/ha
+
+aspenDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Find density for each plot
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000))
+
+
+aspenDensityGraph <- aspenDensity %>%
+  ggplot(aes(x= Community, y = density)) +
+  geom_violin() +
+  #geom_col() +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Community", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0))
+
+aspenDensityGraph
+```
+
+#### Density of Conifers
+
+```{r}
+coniferDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # Find Number of plot in each watershed
+  # group_by(Community) %>%
+  # mutate(numberOfPlots = n_distinct(Site)) %>%
+  # ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Community) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+
+
+coniferDensityGraph <- coniferDensity %>%
+  ggplot(aes(x= Community, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Conifer Density",
+       x = "Community", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(coniferDensityGraph)
+```
+
+
+#### Density of Conifers per Plot
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# density = #/ha
+
+# TODO: should i aggregate all the different types of conifers per plot before graphing?? - probably yea i think so
+
+coniferDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
+  # Find total live trees in each row
+  mutate(treeSum = Class1+Class2+Class3+Class4+Class5) %>%
+  group_by(Site, Community) %>%
+  summarise(totalTrees = sum(treeSum)) %>%
+  # Find density for each plot
+  mutate(density = (totalTrees)/((pi * 4^2)/10000))
+
+
+coniferDensityGraph <- coniferDensity %>%
+  ggplot(aes(x= Community, y = density, fill = Community)) +
+  geom_violin() +
+  #geom_col() +
+  theme_minimal() +
+  labs(title = "Conifer Density",
+       x = "Community", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0)) +
+  scale_fill_light() + 
+  guides(fill="none")
+
+coniferDensityGraph
+```
+
+#### Density of Aspen vs Conifers
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(SpeciesCode != "BEUC2") %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POTR5" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+  # Add up live trees for each row
+  mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Community, isAspen) %>%
+  summarise(density = sum(liveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+            totalLiveTrees = sum(liveTrees),
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))  +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(compareDensityGraph)
+```
+
+
+#### Density of Aspen vs Conifers per Plot
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(SpeciesCode != "BEUC2") %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POTR5" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+    # Find density for each plot
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000))
+  # # Add up live trees for each row
+  # mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  # group_by(Community, isAspen) %>%
+  # summarise(density = sum(liveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+  #           totalLiveTrees = sum(liveTrees),
+  #           numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))  +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))
+
+compareDensityGraph
+```
+
+#### Density of Aspen Age Groups
+
+```{r}
+ageGroupCommunity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Community, class) %>%
+  summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+            sumLiveTrees = sum(treeCount),
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
+
+
+ageGroupCommunityGraph <- ageGroupCommunity %>%
+  ggplot(aes(x= Community, y = density, fill = factor(class))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(ageGroupCommunityGraph)
+```
+
+
+#### Density of Aspen Age Groups Boxplot
+
+```{r}
+ageGroupCommunity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000))
+  # # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  # group_by(Community, class) %>%
+  # summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+  #           sumLiveTrees = sum(treeCount),
+  #           numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
+
+
+ageGroupCommunityGraph <- ageGroupCommunity %>%
+  ggplot(aes(x= Community, y = density, fill = factor(class))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+#plotly::ggplotly(ageGroupCommunityGraph)
+ageGroupCommunityGraph
+```
+
+#### Density of Aspen Age Groups Line Graph
+
+```{r}
+ageGroupCommunity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Community, class) %>%
+  summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+            sumLiveTrees = sum(treeCount),
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
+
+
+ageGroupCommunityGraph <- ageGroupCommunity %>%
+  ggplot(aes(x= class, y = density, color = factor(Community))) +
+  geom_line(aes(group = factor(Community))) + 
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Age Class", y = "Density (#/ha)",
+       color = "Community") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light() + 
+  guides(color="none")
+# +
+#   theme(axis.text.x = element_text(size = 6, angle = 315))
+
+#ageGroupCommunityGraph
+plotly::ggplotly(ageGroupCommunityGraph)
+```
+
+#### Frequency of Disturbances
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each community)
+
+disturbancesFrequency <- aspen$data$Disturbances %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Disturbance = first(Disturbance)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  group_by(Community) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity),
+            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInCommunity) * pi * 4^2)*100)
+
+disturbancesGraph <- disturbancesFrequency %>%
+  ggplot(aes(x = Community, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Disturbance Frequency",
+       x = "Community", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(disturbancesGraph)
+```
+
+
+#### Frequency of Disturbances by Plot
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each community)
+
+disturbancesFrequency <- aspen$data$Disturbances %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Disturbance = first(Disturbance)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  group_by(Community) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity),
+            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInCommunity) * pi * 4^2)*100)
+
+disturbancesGraph <- disturbancesFrequency %>%
+  ggplot(aes(x = Community, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Disturbance Frequency",
+       x = "Community", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(disturbancesGraph)
+```
+
+#### Frequency of Pests
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each community)
+
+pestFrequency <- aspen$data$Pests %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Pest = first(Pest)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+  group_by(Community) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithPests = sum(!is.na(Pest)), 
+            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity),
+            frequency = (first(plotsWithPests) * pi * 4^2)/(first(numberOfPlotsInCommunity) * pi * 4^2)*100)
+
+pestGraph <- pestFrequency %>%
+  ggplot(aes(x = Community, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Pest Frequency",
+       x = "Community", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(pestGraph)
+```
+
+
+# Watershed (Stratum)
+
+#### Density of Live Aspens
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# denisty = #/ha
+
+
+aspenDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Stratum", "numberOfPlotsInWatershed"))), by = join_by(Site)) %>%
+  # Find Number of plot in each watershed
+  # group_by(Stratum) %>%
+  # mutate(numberOfPlots = n_distinct(Site)) %>%
+  # ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stratum) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInWatershed) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlotsInWatershed = first(numberOfPlotsInWatershed))
+
+
+aspenDensityGraph <- aspenDensity %>%
+  ggplot(aes(x= Stratum, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Watershed", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(aspenDensityGraph)
+```
+
+
+#### Density of Conifers
+
+```{r}
+  
+coniferDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Stratum", "numberOfPlotsInWatershed"))), by = join_by(Site)) %>%
+  # Find Number of plot in each watershed
+  # group_by(Stratum) %>%
+  # mutate(numberOfPlotsInWaterShed = n_distinct(Site)) %>%
+  # ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stratum) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInWatershed) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlotsInWatershed = first(numberOfPlotsInWatershed))
+
+
+coniferDensityGraph <- coniferDensity %>%
+  ggplot(aes(x= Stratum, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Conifer Density",
+       x = "Watershed", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(coniferDensityGraph)
+```
+
+
+#### Density of Aspen vs Conifers
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Stratum", "numberOfPlotsInWatershed"))), by = join_by(Site)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(SpeciesCode != "BEUC2") %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POTR5" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+  # Add up live trees for each row
+  mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stratum, isAspen) %>%
+  summarise(density = sum(liveTrees)/((first(numberOfPlotsInWatershed) * pi * 4^2)/10000),
+            totalLiveTrees = sum(liveTrees),
+            numberOfPlotsInWatershed = first(numberOfPlotsInWatershed))
+
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Stratum, y = density, fill = as.factor(isAspen))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Watershed", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()
+
+plotly::ggplotly(compareDensityGraph)
+```
+
+
+#### Density of Aspen Age Groups
+
+```{r}
+ageGroupWatershed <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Stratum", "numberOfPlotsInWatershed"))), by = join_by(Site)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stratum, class) %>%
+  summarise(density = sum(treeCount)/((first(numberOfPlotsInWatershed) * pi * 4^2)/10000),
+            sumLiveTrees = sum(treeCount),
+            numberOfPlotsInWatershed = first(numberOfPlotsInWatershed)) 
+
+
+ageGroupWatershedGraph <- ageGroupWatershed %>%
+  ggplot(aes(x= Stratum, y = density, fill = factor(class))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Watershed", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()
+
+plotly::ggplotly(ageGroupWatershedGraph)
+```
+
+
+
+
+#### Frequency of Disturbances
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each watershed)
+
+disturbancesFrequency <- aspen$data$Disturbances %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Disturbance = first(Disturbance)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Stratum", "numberOfPlotsInWatershed"))), by = join_by(Site)) %>%
+  group_by(Stratum) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
+            numberOfPlotsInWatershed = first(numberOfPlotsInWatershed),
+            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInWatershed) * pi * 4^2)*100)
+
+disturbancesGraph <- disturbancesFrequency %>%
+  ggplot(aes(x = Stratum, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Disturbance Frequency",
+       x = "Watershed", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(disturbancesGraph)
+```
+
+
+#### Frequency of Pests
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each watershed)
+
+pestFrequency <- aspen$data$Pests %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Pest = first(Pest)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Stratum", "numberOfPlotsInWatershed"))), by = join_by(Site)) %>%
+  group_by(Stratum) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithPests = sum(!is.na(Pest)), 
+            numberOfPlotsInWatershed = first(numberOfPlotsInWatershed),
+            frequency = (first(plotsWithPests) * pi * 4^2)/(first(numberOfPlotsInWatershed) * pi * 4^2)*100)
+
+pestGraph <- pestFrequency %>%
+  ggplot(aes(x = Stratum, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Pest Frequency",
+       x = "Watershed", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(pestGraph)
+```
+
+
+# Park
+
+#### Density of Live Aspens
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# denisty = #/ha
+
+
+  
+aspenParkDensity <- aspen$data$Observations %>%
+  # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Site)) %>%
+  ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlots) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlots = first(numberOfPlots))
+
+
+aspenParkDensityGraph <- aspenParkDensity %>%
+  ggplot(aes(x= Park, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Park", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(aspenParkDensityGraph)
+```
+
+#### Density of Conifers
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# denisty = #/ha
+
+
+  
+coniferParkDensity <- aspen$data$Observations %>%
+  # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Site)) %>%
+  ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlots) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlots = first(numberOfPlots))
+
+
+coniferParkDensityGraph <- coniferParkDensity %>%
+  ggplot(aes(x= Park, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Conifer Density",
+       x = "Park", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(coniferParkDensityGraph)
+```
+
+
+#### Density of Aspen vs Conifers
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+    # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Site)) %>%
+  ungroup() %>%
+  # filter out one species that is not aspen or conifer
+  filter(SpeciesCode != "BEUC2") %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POTR5" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+  # Add up live trees for each row
+  mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park, isAspen) %>%
+  summarise(density = sum(liveTrees)/((first(numberOfPlots) * pi * 4^2)/10000),
+            totalLiveTrees = sum(liveTrees),
+            numberOfPlots = first(numberOfPlots))
+
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Park, y = density, fill = as.factor(isAspen))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Park", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()
+
+plotly::ggplotly(compareDensityGraph)
+```
+
+#### Density of Aspen Age Groups
+
+```{r}
+ageGroupPark <- aspen$data$Observations %>%
+  # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Site)) %>%
+  ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POTR5") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park, class) %>%
+  summarise(density = sum(treeCount)/((first(numberOfPlots) * pi * 4^2)/10000),
+            sumLiveTrees = sum(treeCount),
+            numberOfPlots = first(numberOfPlots)) 
+
+
+ageGroupParkGraph <- ageGroupPark %>%
+  ggplot(aes(x= Park, y = density, fill = factor(class))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Park", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()
+
+plotly::ggplotly(ageGroupParkGraph)
+```
+
+
+#### Frequency of Disturbances
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each park)
+
+disturbancesFrequency <- aspen$data$Disturbances %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Disturbance = first(Disturbance)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Park"))), by = join_by(Site)) %>%
+  group_by(Park) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
+            numberOfPlotsInPark = n(),
+            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInPark) * pi * 4^2)*100)
+
+disturbancesGraph <- disturbancesFrequency %>%
+  ggplot(aes(x = Park, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Disturbance Frequency",
+       x = "Park", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(disturbancesGraph)
+```
+
+#### Frequency of Pests
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each park)
+
+pestFrequency <- aspen$data$Pests %>%
+  group_by(Site) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Pest = first(Pest)) %>%
+  # Join disturbance and all site data
+  full_join(select(aspen$data$AllSites, any_of(c("Site", "Park"))), by = join_by(Site)) %>%
+  group_by(Park) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithPests = sum(!is.na(Pest)), 
+            numberOfPlotsInPark = n(),
+            frequency = (first(plotsWithPests) * pi * 4^2)/(first(numberOfPlotsInPark) * pi * 4^2)*100)
+
+pestGraph <- pestFrequency %>%
+  ggplot(aes(x = Park, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Pest Frequency",
+       x = "Park", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(pestGraph)
+```
+

From 9779db6c0bdb19e2d1db470fe789381b10e210c8 Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Tue, 13 Feb 2024 14:56:35 -0800
Subject: [PATCH 07/11] make second aspen data base optional when importing

---
 R/utils.R | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 2684a2e..1034d7f 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -23,25 +23,37 @@ pkg_globals <- new.env(parent = emptyenv())
 fetchAndWrangleAspen <- function(aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
                                  site_url =  "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/AspenSites2/FeatureServer",
                                  agol_username = "mojn_data") {
+  flattened_data <- list(data = list(),
+                         metadata = list())
 
-  # Import aspen database and aspen site database
+  # Import aspen database
   raw_data <- fetchagol::fetchRawData(aspen_url, agol_username)
-  raw_site_data <- fetchagol::fetchRawData(site_url, agol_username)
 
-  # Add site data to list of other data
-  raw_data$data$AllSites <- raw_site_data$data$`MOJN Aspen Sites Master`
-  # Join site metadata to list of other metadata
-  raw_data$metadata$AllSites <- raw_site_data$metadata$`MOJN Aspen Sites Master`
+
+  # Imports optional second database and connects it to main database
+  # For MOJN - there's a master list of sites in a different database
+  if(!is.null(site_url)) {
+    # Import aspen site database
+    raw_site_data <- fetchagol::fetchRawData(site_url, agol_username)
+
+    # Add site data to list of other data
+    raw_data$data$AllSites <- raw_site_data$data$`MOJN Aspen Sites Master`
+    # Join site metadata to list of other metadata
+    raw_data$metadata$AllSites <- raw_site_data$metadata$`MOJN Aspen Sites Master`
+  }
 
   raw_data <- fetchagol::cleanData(raw_data)
 
-  flattened_data <- list(data = list(),
-                         metadata = list())
+  # Add optional second database to flattened data
+  if(!is.null(site_url)) {
+  flattened_data$data$AllSites <- raw_data$data$AllSites
+  }
 
 
   # Join background site information with other tables
-  flattened_data$data$AllSites <- raw_data$data$AllSites
-  flattened_data$data$SiteVisit <- raw_data$data$SiteVisit
+  flattened_data$data$SiteVisit <- raw_data$data$SiteVisit %>%
+    dplyr::left_join(dplyr::select(flattened_data$data$AllSites, dplyr::any_of(c("Site", "Status", "Panel", "Stratum", "Zone_", "Community"))),
+                     by = dplyr::join_by("Site"))
 
   flattened_data$data$Disturbances <- raw_data$data$SiteVisit %>%
     dplyr::mutate(parentglobalid = globalid) %>%

From 09915660a02a82c86e456c23c7086650b30894e1 Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Wed, 14 Feb 2024 09:00:58 -0800
Subject: [PATCH 08/11] adding beginning of unit tests

---
 DESCRIPTION                    |  3 ++
 R/aspen_qc.R                   | 18 +++++------
 man/checkAspenQC.Rd            | 11 +++++++
 man/fetchAndWrangleAspen.Rd    | 12 ++++---
 man/missingTreeQC.Rd           | 11 +++++++
 man/treeSpeciesQC.Rd           | 11 +++++++
 man/unknownSpeciesQC.Rd        | 11 +++++++
 man/writePine.Rd               | 29 +++++++++++++++++
 tests/testthat.R               | 12 +++++++
 tests/testthat/test-aspen_qc.R | 58 ++++++++++++++++++++++++++++++++++
 tests/testthat/test-utils.R    | 23 ++++++++++++++
 11 files changed, 185 insertions(+), 14 deletions(-)
 create mode 100644 man/checkAspenQC.Rd
 create mode 100644 man/missingTreeQC.Rd
 create mode 100644 man/treeSpeciesQC.Rd
 create mode 100644 man/unknownSpeciesQC.Rd
 create mode 100644 man/writePine.Rd
 create mode 100644 tests/testthat.R
 create mode 100644 tests/testthat/test-aspen_qc.R
 create mode 100644 tests/testthat/test-utils.R

diff --git a/DESCRIPTION b/DESCRIPTION
index aa29ceb..9311189 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,3 +14,6 @@ RoxygenNote: 7.3.1
 Imports: 
     dplyr,
     fetchagol
+Suggests: 
+    testthat (>= 3.0.0)
+Config/testthat/edition: 3
diff --git a/R/aspen_qc.R b/R/aspen_qc.R
index c7750ad..e06dd9c 100644
--- a/R/aspen_qc.R
+++ b/R/aspen_qc.R
@@ -1,4 +1,3 @@
-#' @importFrom magrittr %>% %<>%
 
 #' Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
 missingTreeQC <- function(data = fetchAndWrangleAspen()){
@@ -7,7 +6,8 @@ missingTreeQC <- function(data = fetchAndWrangleAspen()){
     # Add up all trees in a row
     dplyr::mutate(totalTreeCount = Class1+Class2+Class3+Class4+Class5+Class6) %>%
     # Filter for any rows that have less than one trees or more than 250 trees
-    dplyr::filter(totalTreeCount < 1 | totalTreeCount > 250)
+    dplyr::filter(totalTreeCount < 1 | totalTreeCount > 250) %>%
+    dplyr::select(Park, Site, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6, totalTreeCount)
 
   return(missingTreeQC)
 }
@@ -16,7 +16,8 @@ missingTreeQC <- function(data = fetchAndWrangleAspen()){
 treeSpeciesQC <- function(data = fetchAndWrangleAspen()){
 
   treeSpeciesQC <- data$data$Observations %>%
-    dplyr::filter(SpeciesCode == "" | is.na(SpeciesCode) | SpeciesCode == "NA")
+    dplyr::filter(SpeciesCode == "" | is.na(SpeciesCode) | SpeciesCode == "NA") %>%
+    dplyr::select(Park, Site, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6)
 
   return(treeSpeciesQC)
 }
@@ -26,7 +27,8 @@ unknownSpeciesQC <- function(data = fetchAndWrangleAspen()){
 
   unknownSpeciesQC <- data$data$Observations %>%
     dplyr::filter(SpeciesCode == "UNK") %>%
-    dplyr::filter(Class1 != 0 | Class2 != 0 | Class3 != 0 | Class4 != 0 | Class5 != 0)
+    dplyr::filter(Class1 != 0 | Class2 != 0 | Class3 != 0 | Class4 != 0 | Class5 != 0) %>%
+    dplyr::select(Park, Site, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6)
 
   return(unknownSpeciesQC)
 }
@@ -37,16 +39,14 @@ checkAspenQC <- function(data = fetchAndWrangleAspen()){
   checkAspenQC <- data$data$Observations %>%
     # Filter for aspen trees
     dplyr::filter(SpeciesCode == "POTR5") %>%
+    dplyr::select(parentglobalid, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6) %>%
     # Join observations and visits
     dplyr::full_join(data$data$SiteVisit, by = c("parentglobalid" = "globalid")) %>%
     # Filter data for any visits that don't have aspen observations
-    dplyr::filter(is.na(SpeciesCode))
+    dplyr::filter(is.na(SpeciesCode)) %>%
+    dplyr::select(Park, Site, VisitType, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6)
 
   return(checkAspenQC)
 }
 
 
-
-
-
-
diff --git a/man/checkAspenQC.Rd b/man/checkAspenQC.Rd
new file mode 100644
index 0000000..b63930c
--- /dev/null
+++ b/man/checkAspenQC.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc.R
+\name{checkAspenQC}
+\alias{checkAspenQC}
+\title{Check that all of the plots have at least one aspen each time they are visited}
+\usage{
+checkAspenQC(data = fetchAndWrangleAspen())
+}
+\description{
+Check that all of the plots have at least one aspen each time they are visited
+}
diff --git a/man/fetchAndWrangleAspen.Rd b/man/fetchAndWrangleAspen.Rd
index b6f32a2..2199dd1 100644
--- a/man/fetchAndWrangleAspen.Rd
+++ b/man/fetchAndWrangleAspen.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/utils.R
 \name{fetchAndWrangleAspen}
 \alias{fetchAndWrangleAspen}
-\title{Fetch feature service info from AGOL}
+\title{Fetch aspen data from AGOL and do preliminary data wrangling}
 \usage{
 fetchAndWrangleAspen(
   aspen_url =
@@ -13,13 +13,15 @@ fetchAndWrangleAspen(
 )
 }
 \arguments{
-\item{url}{Feature service URL}
+\item{aspen_url}{URL to main AGOL aspen database}
 
-\item{token}{Authentication token (not needed for public layers)}
+\item{site_url}{URL to AGOL database for aspen sites (if applicable)}
+
+\item{agol_username}{Authentication token (not needed for public layers)}
 }
 \value{
-A list
+A list of data frames and metadata
 }
 \description{
-Retrieves metadata from AGOL layers and tables.
+Fetch aspen data from AGOL and do preliminary data wrangling
 }
diff --git a/man/missingTreeQC.Rd b/man/missingTreeQC.Rd
new file mode 100644
index 0000000..2c1e0f2
--- /dev/null
+++ b/man/missingTreeQC.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc.R
+\name{missingTreeQC}
+\alias{missingTreeQC}
+\title{Check that there is at least one tree for every observation entry and flag entries with more than 250 trees}
+\usage{
+missingTreeQC(data = fetchAndWrangleAspen())
+}
+\description{
+Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+}
diff --git a/man/treeSpeciesQC.Rd b/man/treeSpeciesQC.Rd
new file mode 100644
index 0000000..e44912b
--- /dev/null
+++ b/man/treeSpeciesQC.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc.R
+\name{treeSpeciesQC}
+\alias{treeSpeciesQC}
+\title{Check that no entries in the observation table have a SpeciesCode that is NA or missing}
+\usage{
+treeSpeciesQC(data = fetchAndWrangleAspen())
+}
+\description{
+Check that no entries in the observation table have a SpeciesCode that is NA or missing
+}
diff --git a/man/unknownSpeciesQC.Rd b/man/unknownSpeciesQC.Rd
new file mode 100644
index 0000000..cbb574d
--- /dev/null
+++ b/man/unknownSpeciesQC.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc.R
+\name{unknownSpeciesQC}
+\alias{unknownSpeciesQC}
+\title{Check that unknown species entries do not have any live trees}
+\usage{
+unknownSpeciesQC(data = fetchAndWrangleAspen())
+}
+\description{
+Check that unknown species entries do not have any live trees
+}
diff --git a/man/writePine.Rd b/man/writePine.Rd
new file mode 100644
index 0000000..8c59de4
--- /dev/null
+++ b/man/writePine.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{writePine}
+\alias{writePine}
+\title{Write pine data to CSV}
+\usage{
+writePine(
+  data_dir = here::here("data", "final"),
+  dictionary_dir = here::here("data", "dictionary"),
+  dictionary_filenames = c(tables = "data_dictionary_tables.txt", attributes =
+    "data_dictionary_attributes.txt", categories = "data_dictionary_categories.txt"),
+  verbose = FALSE,
+  ...
+)
+}
+\arguments{
+\item{data_dir}{Folder to store data csv's in}
+
+\item{dictionary_dir}{Folder to store data dictionaries in}
+
+\item{dictionary_filenames}{Named list with names \code{c("tables", "attributes", "categories")} indicating what to name the tables, attributes, and categories data dictionaries. You are encouraged to keep the default names unless you have a good reason to change them.}
+
+\item{verbose}{Output feedback to console?}
+
+\item{...}{Arguments to pass to \code{\link[=filterPine]{filterPine()}}}
+}
+\description{
+Write pine data to CSV
+}
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 0000000..e90bb1c
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(aspen)
+
+test_check("aspen")
diff --git a/tests/testthat/test-aspen_qc.R b/tests/testthat/test-aspen_qc.R
new file mode 100644
index 0000000..56246d8
--- /dev/null
+++ b/tests/testthat/test-aspen_qc.R
@@ -0,0 +1,58 @@
+
+# things to test in qc functions
+
+  # check that each data table has the correct number of columns and that the columns have the right names
+  # check that the return type from function is correct
+  # make fake data and then test number of rows?? - nah probs not
+
+test_that("Test that missingTreeQC() works", {
+  # Run function
+  returnData <- missingTreeQC()
+
+  # Compare expected and actual column names
+  actual_cols <- names(returnData)
+  expected_cols <- c("Park", "Site", "VisitDate", "SpeciesCode", "Class1", "Class2", "Class3", "Class4", "Class5", "Class6", "totalTreeCount")
+  expect_equal(actual_cols, expected_cols)
+
+  # Check that function returns a database
+  expect_s3_class(returnData, "data.frame")
+})
+
+test_that("Test that treeSpeciesQC() works", {
+  # Run function
+  returnData <- treeSpeciesQC()
+
+  # Compare expected and actual column names
+  actual_cols <- names(returnData)
+  expected_cols <- c("Park", "Site", "VisitDate", "SpeciesCode", "Class1", "Class2", "Class3", "Class4", "Class5", "Class6")
+  expect_equal(actual_cols, expected_cols)
+
+  # Check that function returns a database
+  expect_s3_class(returnData, "data.frame")
+})
+
+test_that("Test that unknownSpeciesQC() works", {
+  # Run function
+  returnData <- unknownSpeciesQC()
+
+  # Compare expected and actual column names
+  actual_cols <- names(returnData)
+  expected_cols <- c("Park", "Site", "VisitDate", "SpeciesCode", "Class1", "Class2", "Class3", "Class4", "Class5", "Class6")
+  expect_equal(actual_cols, expected_cols)
+
+  # Check that function returns a database
+  expect_s3_class(returnData, "data.frame")
+})
+
+test_that("Test that checkAspenQC() works", {
+  # Run function
+  returnData <- checkAspenQC()
+
+  # Compare expected and actual column names
+  actual_cols <- names(returnData)
+  expected_cols <- c("Park", "Site","VisitType", "VisitDate", "SpeciesCode", "Class1", "Class2", "Class3", "Class4", "Class5", "Class6")
+  expect_equal(actual_cols, expected_cols)
+
+  # Check that function returns a database
+  expect_s3_class(returnData, "data.frame")
+})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
new file mode 100644
index 0000000..71b25fb
--- /dev/null
+++ b/tests/testthat/test-utils.R
@@ -0,0 +1,23 @@
+
+
+# TODO things to test in fetchAndWrangleAspen - only function in utils rn
+  # check that $data and $meta data exists
+  # check that there are the correct number of tables in data and metadata
+  # check that each data table has the correct number of columns and that the columns have the right names
+  # check that the return type from function is correct
+
+test_that("Test that fetchAndWrangleAspen() works", {
+  # Compare expected and actual names in return object
+  actual_names <- names(fetchAndWrangleAspen())
+  expected_names <- c("data", "metadata")
+  expect_equal(actual_names, expected_names)
+
+  # Compare expected and actual names of the aspen data frames
+  actual_cols <- names(fetchAndWrangleAspen()$data)
+  expected_cols <- c("AllSites", "SiteVisit", "Disturbances", "Observations", "Pests")
+  expect_equal(actual_cols, expected_cols)
+
+  # Check that the first object in the data list is a database
+  returnType <- fetchAndWrangleAspen()$data[[1]]
+  expect_s3_class(returnType, "data.frame")
+})

From ac28f8f42d65f6d66874b5f8bb433ede0a55169d Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Mon, 6 May 2024 12:09:30 -0700
Subject: [PATCH 09/11] adding UCBN data to package and functions for writing
 data

---
 DESCRIPTION                                   |   3 +-
 NAMESPACE                                     |   4 +-
 R/aspen_qc.R                                  |  34 +-
 R/aspen_qc_ucbn.R                             |  80 ++
 R/utils.R                                     | 171 ++--
 man/checkAspenQC.Rd                           |   7 +-
 man/checkAspenUCBNQC.Rd                       |  14 +
 ...leAspen.Rd => fetchAndWrangleMOJNAspen.Rd} |   6 +-
 man/fetchAndWrangleUCBNAspen.Rd               |  26 +
 man/missingTreeQC.Rd                          |   3 +
 man/missingTreeUCBNQC.Rd                      |  14 +
 man/treeChangeQC.Rd                           |  14 +
 man/treeChangeUCBNQC.Rd                       |  14 +
 man/treeSpeciesQC.Rd                          |   3 +
 man/treeSpeciesUCBNQC.Rd                      |  14 +
 man/unknownSpeciesQC.Rd                       |   3 +
 man/unknownSpeciesUCBNQC.Rd                   |  14 +
 man/{writePine.Rd => writeAspen.Rd}           |  20 +-
 .../visualizations/aspen_visualizations.qmd   | 593 +++++++++---
 .../aspen_visualizations_ucbn.qmd             | 845 ++++++++++++++++++
 scripts/visualizations/qcResults.qmd          |  78 ++
 tests/testthat/test-aspen_qc.R                |  13 +-
 tests/testthat/test-utils.R                   |   8 +-
 23 files changed, 1771 insertions(+), 210 deletions(-)
 create mode 100644 R/aspen_qc_ucbn.R
 create mode 100644 man/checkAspenUCBNQC.Rd
 rename man/{fetchAndWrangleAspen.Rd => fetchAndWrangleMOJNAspen.Rd} (89%)
 create mode 100644 man/fetchAndWrangleUCBNAspen.Rd
 create mode 100644 man/missingTreeUCBNQC.Rd
 create mode 100644 man/treeChangeQC.Rd
 create mode 100644 man/treeChangeUCBNQC.Rd
 create mode 100644 man/treeSpeciesUCBNQC.Rd
 create mode 100644 man/unknownSpeciesUCBNQC.Rd
 rename man/{writePine.Rd => writeAspen.Rd} (65%)
 create mode 100644 scripts/visualizations/aspen_visualizations_ucbn.qmd
 create mode 100644 scripts/visualizations/qcResults.qmd

diff --git a/DESCRIPTION b/DESCRIPTION
index 9311189..3383d18 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,8 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
 Imports: 
     dplyr,
-    fetchagol
+    fetchagol,
+    magrittr
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 6e231fd..7c1f1ae 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-export(fetchAndWrangleAspen)
+export(fetchAndWrangleMOJNAspen)
+export(fetchAndWrangleUCBNAspen)
+export(writeAspen)
 import(dplyr)
 import(fetchagol)
 importFrom(magrittr,"%<>%")
diff --git a/R/aspen_qc.R b/R/aspen_qc.R
index e06dd9c..e00443f 100644
--- a/R/aspen_qc.R
+++ b/R/aspen_qc.R
@@ -1,5 +1,6 @@
 
 #' Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+#' @param data Aspen dataset to run through QC functions
 missingTreeQC <- function(data = fetchAndWrangleAspen()){
 
   missingTreeQC <- data$data$Observations %>%
@@ -13,6 +14,7 @@ missingTreeQC <- function(data = fetchAndWrangleAspen()){
 }
 
 #' Check that no entries in the observation table have a SpeciesCode that is NA or missing
+#' @param data Aspen dataset to run through QC functions
 treeSpeciesQC <- function(data = fetchAndWrangleAspen()){
 
   treeSpeciesQC <- data$data$Observations %>%
@@ -23,6 +25,7 @@ treeSpeciesQC <- function(data = fetchAndWrangleAspen()){
 }
 
 #' Check that unknown species entries do not have any live trees
+#' @param data Aspen dataset to run through QC functions
 unknownSpeciesQC <- function(data = fetchAndWrangleAspen()){
 
   unknownSpeciesQC <- data$data$Observations %>%
@@ -33,20 +36,45 @@ unknownSpeciesQC <- function(data = fetchAndWrangleAspen()){
   return(unknownSpeciesQC)
 }
 
-#' Check that all of the plots have at least one aspen each time they are visited
+#' Check that all plots have at least one live aspen each time they are visited
+#' @param data Aspen dataset to run through QC functions
 checkAspenQC <- function(data = fetchAndWrangleAspen()){
 
   checkAspenQC <- data$data$Observations %>%
     # Filter for aspen trees
     dplyr::filter(SpeciesCode == "POTR5") %>%
-    dplyr::select(parentglobalid, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6) %>%
+    dplyr::select(parentglobalid, SpeciesCode, Class1, Class2, Class3, Class4, Class5) %>%
     # Join observations and visits
     dplyr::full_join(data$data$SiteVisit, by = c("parentglobalid" = "globalid")) %>%
     # Filter data for any visits that don't have aspen observations
     dplyr::filter(is.na(SpeciesCode)) %>%
-    dplyr::select(Park, Site, VisitType, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6)
+    dplyr::select(Park, Site, VisitType, VisitDate, SpeciesCode)
 
   return(checkAspenQC)
 }
 
+#' Flag large changes in number of tree between years
+#' @param data Aspen dataset to run through QC functions
+treeChangeQC <- function(data = fetchAndWrangleAspen()){
 
+  visits <- data$data$SiteVisit %>%
+    dplyr::group_by(Site) %>%
+    dplyr::arrange(Site, VisitDate) %>%
+    dplyr::mutate(visitNumber = seq_along(Site)) %>%
+    dplyr::select("globalid", "Park", "Site", "VisitType", "VisitDate", "visitNumber")
+
+  treeChangeQC <- data$data$Observations %>%
+    # join with site visit table
+    #dplyr::left_join(visits, by = c("parentglobalid" = "globalid")) %>%
+    dplyr::mutate(totalTrees = Class1+Class2+Class3+Class4+Class5) %>%
+    dplyr::group_by(Site, VisitDate) %>%
+    dplyr::summarise(totalTrees = sum(totalTrees)) %>%
+    dplyr::group_by(Site) %>%
+    dplyr::mutate(previousTrees = dplyr::lag(totalTrees),
+                  treeChange = previousTrees - totalTrees)
+
+    # find percent change (in total number of trees both conifer and aspen or separate??) between year ((year 1 - year 2)/year 1)
+    # flag any with a large change (figure out what counts as a large change)
+}
+
+# Flag if there's more than one visit to a plot in a year
diff --git a/R/aspen_qc_ucbn.R b/R/aspen_qc_ucbn.R
new file mode 100644
index 0000000..f8f8466
--- /dev/null
+++ b/R/aspen_qc_ucbn.R
@@ -0,0 +1,80 @@
+
+#' Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+#' @param data Aspen dataset to run through QC functions
+missingTreeUCBNQC <- function(data){
+
+  missingTreeQC <- data$data$Observations %>%
+    # Add up all trees in a row
+    dplyr::mutate(totalTreeCount = Class1+Class2+Class3+Class4+Class5+Class6) %>%
+    # Filter for any rows that have less than one trees or more than 250 trees
+    dplyr::filter(totalTreeCount < 1 | totalTreeCount > 250) %>%
+    dplyr::select(Park, Unique_ID, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6, totalTreeCount)
+
+  return(missingTreeQC)
+}
+
+#' Check that no entries in the observation table have a SpeciesCode that is NA or missing
+#' @param data Aspen dataset to run through QC functions
+treeSpeciesUCBNQC <- function(data){
+
+  treeSpeciesQC <- data$data$Observations %>%
+    dplyr::filter(SpeciesCode == "" | is.na(SpeciesCode) | SpeciesCode == "NA") %>%
+    dplyr::select(Park, Unique_ID, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6)
+
+  return(treeSpeciesQC)
+}
+
+#' Check that unknown species entries do not have any live trees
+#' @param data Aspen dataset to run through QC functions
+unknownSpeciesUCBNQC <- function(data){
+
+  unknownSpeciesQC <- data$data$Observations %>%
+    dplyr::filter(SpeciesCode == "UNK") %>%
+    dplyr::filter(Class1 != 0 | Class2 != 0 | Class3 != 0 | Class4 != 0 | Class5 != 0) %>%
+    dplyr::select(Park, Unique_ID, VisitDate, SpeciesCode, Class1, Class2, Class3, Class4, Class5, Class6)
+
+  return(unknownSpeciesQC)
+}
+
+#' Check that all plots have at least one live aspen for each visit
+#' @param data Aspen dataset to run through QC functions
+checkAspenUCBNQC <- function(data){
+
+  checkAspenQC <- data$data$Observations %>%
+    # Filter for aspen trees
+    dplyr::filter(SpeciesCode == "POPTRE") %>%
+    dplyr::select(parentglobalid, SpeciesCode, Class1, Class2, Class3, Class4, Class5) %>%
+    # Join observations and visits
+    dplyr::full_join(data$data$SiteVisit, by = c("parentglobalid" = "globalid")) %>%
+    # Filter data for any visits that don't have aspen observations
+    dplyr::filter(is.na(SpeciesCode)) %>% # Any visit without aspens should have a NA in SpeciesCode after joining with only aspen observations
+    dplyr::select(Park, Unique_ID, VisitDate, SpeciesCode)
+
+  return(checkAspenQC)
+}
+
+#' Flag large changes in number of tree between years
+#' @param data Aspen dataset to run through QC functions
+treeChangeUCBNQC <- function(data){
+
+  visits <- data$data$SiteVisit %>%
+    dplyr::group_by(Unique_ID) %>%
+    dplyr::arrange(Unique_ID, VisitDate) %>%
+    dplyr::mutate(visitNumber = seq_along(Unique_ID)) %>%
+    dplyr::select("globalid", "Park", "Unique_ID", "VisitDate", "visitNumber")
+
+  treeChangeQC <- data$data$Observations %>%
+    # join with site visit table
+    #dplyr::left_join(visits, by = c("parentglobalid" = "globalid")) %>%
+    dplyr::mutate(totalTrees = Class1+Class2+Class3+Class4+Class5) %>%
+    dplyr::group_by(Unique_ID, VisitDate) %>%
+    dplyr::summarise(totalTrees = sum(totalTrees)) %>%
+    dplyr::group_by(Unique_ID) %>%
+    dplyr::mutate(previousTrees = dplyr::lag(totalTrees),
+                  treeChange = previousTrees - totalTrees)
+
+    # find percent change (in total number of trees both conifer and aspen or separate??) between year ((year 1 - year 2)/year 1)
+    # flag any with a large change (figure out what counts as a large change)
+}
+
+# Flag if there's more than one visit to a plot in a year
diff --git a/R/utils.R b/R/utils.R
index 1034d7f..1766684 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -14,13 +14,10 @@ pkg_globals <- new.env(parent = emptyenv())
 #' @return A list of data frames and metadata
 #' @export
 
-
-# TODO make sure this works for everybody
-# TODO make mojn master site data optional
+# TODO: add variables that were joined to data tables to each metadata table
 # TODO should data be loaded from inside the function like now or input as a variable
-# TODO agol_password should maybe be added as a variable???
-# TODO can we remove creation, creationDate, editDate, and editor from all the tables??
-fetchAndWrangleAspen <- function(aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
+fetchAndWrangleMOJNAspen <- function(
+    aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
                                  site_url =  "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/AspenSites2/FeatureServer",
                                  agol_username = "mojn_data") {
   flattened_data <- list(data = list(),
@@ -44,78 +41,150 @@ fetchAndWrangleAspen <- function(aspen_url = "https://services1.arcgis.com/fBc8E
 
   raw_data <- fetchagol::cleanData(raw_data)
 
+
+  # TODO Figure out what to do with metadata, in iu its left empty/fix metadata
+  flattened_data$metadata <- raw_data$metadata
+
   # Add optional second database to flattened data
   if(!is.null(site_url)) {
   flattened_data$data$AllSites <- raw_data$data$AllSites
-  }
-
 
   # Join background site information with other tables
-  flattened_data$data$SiteVisit <- raw_data$data$SiteVisit %>%
+  raw_data$data$SiteVisit <- raw_data$data$SiteVisit %>%
     dplyr::left_join(dplyr::select(flattened_data$data$AllSites, dplyr::any_of(c("Site", "Status", "Panel", "Stratum", "Zone_", "Community"))),
                      by = dplyr::join_by("Site"))
+  flattened_data$metadata$SiteVisit$fields <- append(flattened_data$metadata$SiteVisit$fields, flattened_data$metadata$AllSites$fields[c("Status", "Panel", "Stratum", "Zone_", "Community")])
+  flattened_data$metadata$AllSites$table_name <- "AllSites"
+
+  }
+
+
+  # Join background site information with other tables
+  # flattened_data$data$SiteVisit <- raw_data$data$SiteVisit %>%
+  #   dplyr::left_join(dplyr::select(flattened_data$data$AllSites, dplyr::any_of(c("Site", "Status", "Panel", "Stratum", "Zone_", "Community"))),
+  #                    by = dplyr::join_by("Site"))
+  flattened_data$data$SiteVisit <- raw_data$data$SiteVisit
 
   flattened_data$data$Disturbances <- raw_data$data$SiteVisit %>%
     dplyr::mutate(parentglobalid = globalid) %>%
     dplyr::select(parentglobalid, Park, Site, VisitType, VisitDate, FieldSeason) %>%
     dplyr::right_join(raw_data$data$Disturbances, by = c("parentglobalid" = "parentglobalid"))
+  flattened_data$metadata$Disturbances$fields <- append(flattened_data$metadata$Disturbances$fields, flattened_data$metadata$SiteVisit$fields[c("Park", "VisitType", "VisitDate", "FieldSeason")])
+
 
   flattened_data$data$Observations <- raw_data$data$SiteVisit %>%
     dplyr::mutate(parentglobalid = globalid) %>%
     dplyr::select(parentglobalid, Park, Site, VisitType, VisitDate, FieldSeason) %>%
     dplyr::right_join(raw_data$data$Observations, by = c("parentglobalid" = "parentglobalid"))
+  flattened_data$metadata$Observations$fields <- append(flattened_data$metadata$Observations$fields, flattened_data$metadata$SiteVisit$fields[c("Park", "Site", "VisitType", "VisitDate", "FieldSeason")])
 
   flattened_data$data$Pests <- flattened_data$data$Observations %>%
     dplyr::mutate(parentglobalid = globalid) %>%
     dplyr::select(parentglobalid, Park, Site, VisitType, VisitDate, FieldSeason, SpeciesCode) %>%
     dplyr::right_join(raw_data$data$Pests, by = c("parentglobalid" = "parentglobalid"))
+  flattened_data$metadata$Pests$fields <- append(flattened_data$metadata$Pests$fields, flattened_data$metadata$Observations$fields[c("Park", "VisitType", "VisitDate", "FieldSeason", "SpeciesCode")])
 
-  # TODO Figure out what to do with metadata, in iu its left empty
-  flattened_data$metadata <- raw_data$metadata
+  invisible(flattened_data)
+}
 
+#' Fetch aspen data from AGOL and do preliminary data wrangling
+#'
+#' @param aspen_url URL to main AGOL aspen database
+#' @param site_url URL to AGOL database for aspen sites (if applicable)
+#' @param agol_username Authentication token (not needed for public layers)
+#'
+#' @return A list of data frames and metadata
+#' @export
 
-  return(flattened_data)
-}
 
-formatMetadataAsEML <- function(meta, token) {
-  attrs <- sapply(names(meta), function(attr_name) {
-    meta_list <- meta[[attr_name]]
-    if (meta_list$attributes$class == "date") {
-      format_string <- "YYYY-MM-DD"
-    } else if (meta_list$attributes$class == "dateTime") {
-      format_string <- "YYYY-MM-DDThh:mm:ss"
-    } else {
-      format_string <- NA
-    }
-    attributes_df <- tibble::tibble(attributeName = attr_name,
-                                    attributeDefinition = meta_list$description,
-                                    unit = c(meta_list$attributes$unit, NA)[1],
-                                    class = meta_list$attributes$class,
-                                    dateTimeFormatString = format_string,
-                                    missingValueCode = c(meta_list$attributes$missing_value_code, NA)[1],
-                                    missingValueCodeExplanation = c(meta_list$attributes$missing_value_code_exp, NA)[1])
-
-    return(attrs)
-  }, simplify = TRUE)
-
-  catvars <- sapply(names(meta), function(attr_name) {
-    if (length(meta_list$lookup$lookup_url) > 0) {
-      catvars_df <- meta_list$lookup$lookup_df %>%
-        dplyr::mutate(attributeName = attr_name) %>%
-        dplyr::select(attributeName, code = name, definition = description)
-    } else {
-      catvars_df <- NULL
-    }
-    return(catvars_df)
-  }, simplify = TRUE)
+# TODO fix this
+fetchAndWrangleUCBNAspen <- function(
+    aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/service_4d6343e9204142928351c52c6f1362c5/FeatureServer",
+                                 site_url =  NULL,
+                                 agol_username = "mojn_data") {
+  flattened_data <- list(data = list(),
+                         metadata = list())
+
+  # Import aspen database
+  raw_data <- fetchagol::fetchRawData(aspen_url, agol_username)
+
 
+  # Imports optional second database and connects it to main database
+  # For MOJN - there's a master list of sites in a different database
+  if(!is.null(site_url)) {
+    # Import aspen site database
+    raw_site_data <- fetchagol::fetchRawData(site_url, agol_username)
+
+    # Add site data to list of other data
+    raw_data$data$AllSites <- raw_site_data$data$`MOJN Aspen Sites Master`
+    # Join site metadata to list of other metadata
+    raw_data$metadata$AllSites <- raw_site_data$metadata$`MOJN Aspen Sites Master`
+  }
+
+  raw_data <- fetchagol::cleanData(raw_data)
+
+  # Add optional second database to flattened data
+  if(!is.null(site_url)) {
+    flattened_data$data$AllSites <- raw_data$data$AllSites
+
+    # TODO Figure out what to do with metadata, in iu its left empty/fix metadata
+    flattened_data$metadata <- raw_data$metadata
+
+    # Join background site information with other tables and add background information metadata to tables it was added to
+    raw_data$data$SiteVisit <- raw_data$data$SiteVisit %>%
+      dplyr::left_join(dplyr::select(flattened_data$data$AllSites, dplyr::any_of(c("Site", "Status", "Panel", "Stratum", "Zone_", "Community"))),
+                       by = dplyr::join_by("Site"))}
+  flattened_data$metadata$SiteVisit$fields <- append(flattened_data$metadata$SiteVisit$fields, flattened_data$metadata$AllSites$fields[c("Site", "Status", "Panel", "Stratum", "Zone_", "Community")])
+
+
+  # Join background site information with other tables
+  # flattened_data$data$SiteVisit <- raw_data$data$SiteVisit %>%
+  #   dplyr::left_join(dplyr::select(flattened_data$data$AllSites, dplyr::any_of(c("Site", "Status", "Panel", "Stratum", "Zone_", "Community"))),
+  #                    by = dplyr::join_by("Site"))
+  flattened_data$data$SiteVisit <- raw_data$data$SiteVisit
+
+  flattened_data$data$Disturbances <- raw_data$data$SiteVisit %>%
+    dplyr::mutate(parentglobalid = globalid) %>%
+    dplyr::select(parentglobalid, Park, Stand, Transect, VisitDate, PlotNum, Unique_ID) %>%
+    dplyr::right_join(raw_data$data$Disturbances, by = c("parentglobalid" = "parentglobalid"))
+  flattened_data$metadata$Disturbances$fields <- append(flattened_data$metadata$Disturbances$fields, flattened_data$metadata$SiteVisit$fields[c("Park", "Stand", "Transect", "VisitDate", "PlotNum", "Unique_ID")])
+
+  flattened_data$data$Observations <- raw_data$data$SiteVisit %>%
+    dplyr::mutate(parentglobalid = globalid) %>%
+    dplyr::select(parentglobalid, Park, Stand, Transect, VisitDate, PlotNum, Unique_ID) %>%
+    dplyr::right_join(raw_data$data$Observations, by = c("parentglobalid" = "parentglobalid"))
+  flattened_data$metadata$Observations$fields <- append(flattened_data$metadata$Observations$fields, flattened_data$metadata$SiteVisit$fields[c("Park", "Stand", "Transect", "VisitDate", "PlotNum", "Unique_ID")])
+
+  flattened_data$data$Pests <- flattened_data$data$Observations %>%
+    dplyr::mutate(parentglobalid = globalid) %>%
+    dplyr::select(parentglobalid, Park, Stand, Transect, VisitDate, PlotNum, Unique_ID) %>%
+    dplyr::right_join(raw_data$data$Pests, by = c("parentglobalid" = "parentglobalid"))
+  flattened_data$metadata$Pests$fields <- append(flattened_data$metadata$Pests$fields, flattened_data$metadata$SiteVisit$fields[c("Park", "Stand", "Transect", "VisitDate", "PlotNum", "Unique_ID")])
+
+  invisible(flattened_data)
 }
 
+#' Write aspen data to CSV
+#'
+#' @inheritParams fetchagol::writeToFiles
+#'
+#' @export
+#'
+writeAspen <- function(all_data, data_dir = here::here("data", "final"), dictionary_dir = here::here("data", "dictionary"),
+                      dictionary_filenames = c(tables = "data_dictionary_tables.txt",
+                                               attributes = "data_dictionary_attributes.txt",
+                                               categories = "data_dictionary_categories.txt"),
+                      verbose = FALSE, removeColumns = TRUE, cols_to_remove = c("Editor", "Creator"))
+{
+  fetchagol::writeToFiles(all_data = all_data, data_dir = data_dir, dictionary_dir = dictionary_dir, lookup_dir = NA, verbose = verbose, removeColumns = TRUE, cols_to_remove = c("Editor", "Creator"))
+}
+
+
+
+# TODO: should a get_data() function be added like in pine??
+
+
+
+
+
 
-# fetchHostedCSV <- function(item_id, token, root = "nps.maps.arcgis.com") {
-#   url <- paste0("https://", root, "/sharing/rest/content/items/", item_id, "/data")
-#   resp <- httr::GET(url, query = list(token = token$token))
-#   content <- httr::content(resp, type = "text/csv", encoding = "UTF-8")
-#
-#   return(content)
-# }
diff --git a/man/checkAspenQC.Rd b/man/checkAspenQC.Rd
index b63930c..2e5d423 100644
--- a/man/checkAspenQC.Rd
+++ b/man/checkAspenQC.Rd
@@ -2,10 +2,13 @@
 % Please edit documentation in R/aspen_qc.R
 \name{checkAspenQC}
 \alias{checkAspenQC}
-\title{Check that all of the plots have at least one aspen each time they are visited}
+\title{Check that all plots have at least one live aspen each time they are visited}
 \usage{
 checkAspenQC(data = fetchAndWrangleAspen())
 }
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
 \description{
-Check that all of the plots have at least one aspen each time they are visited
+Check that all plots have at least one live aspen each time they are visited
 }
diff --git a/man/checkAspenUCBNQC.Rd b/man/checkAspenUCBNQC.Rd
new file mode 100644
index 0000000..6e133bf
--- /dev/null
+++ b/man/checkAspenUCBNQC.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc_ucbn.R
+\name{checkAspenUCBNQC}
+\alias{checkAspenUCBNQC}
+\title{Check that all plots have at least one live aspen for each visit}
+\usage{
+checkAspenUCBNQC(data)
+}
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
+\description{
+Check that all plots have at least one live aspen for each visit
+}
diff --git a/man/fetchAndWrangleAspen.Rd b/man/fetchAndWrangleMOJNAspen.Rd
similarity index 89%
rename from man/fetchAndWrangleAspen.Rd
rename to man/fetchAndWrangleMOJNAspen.Rd
index 2199dd1..ffaa9b1 100644
--- a/man/fetchAndWrangleAspen.Rd
+++ b/man/fetchAndWrangleMOJNAspen.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/utils.R
-\name{fetchAndWrangleAspen}
-\alias{fetchAndWrangleAspen}
+\name{fetchAndWrangleMOJNAspen}
+\alias{fetchAndWrangleMOJNAspen}
 \title{Fetch aspen data from AGOL and do preliminary data wrangling}
 \usage{
-fetchAndWrangleAspen(
+fetchAndWrangleMOJNAspen(
   aspen_url =
     "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
   site_url =
diff --git a/man/fetchAndWrangleUCBNAspen.Rd b/man/fetchAndWrangleUCBNAspen.Rd
new file mode 100644
index 0000000..5fdc77a
--- /dev/null
+++ b/man/fetchAndWrangleUCBNAspen.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{fetchAndWrangleUCBNAspen}
+\alias{fetchAndWrangleUCBNAspen}
+\title{Fetch aspen data from AGOL and do preliminary data wrangling}
+\usage{
+fetchAndWrangleUCBNAspen(
+  aspen_url =
+    "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/service_4d6343e9204142928351c52c6f1362c5/FeatureServer",
+  site_url = NULL,
+  agol_username = "mojn_data"
+)
+}
+\arguments{
+\item{aspen_url}{URL to main AGOL aspen database}
+
+\item{site_url}{URL to AGOL database for aspen sites (if applicable)}
+
+\item{agol_username}{Authentication token (not needed for public layers)}
+}
+\value{
+A list of data frames and metadata
+}
+\description{
+Fetch aspen data from AGOL and do preliminary data wrangling
+}
diff --git a/man/missingTreeQC.Rd b/man/missingTreeQC.Rd
index 2c1e0f2..5916796 100644
--- a/man/missingTreeQC.Rd
+++ b/man/missingTreeQC.Rd
@@ -6,6 +6,9 @@
 \usage{
 missingTreeQC(data = fetchAndWrangleAspen())
 }
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
 \description{
 Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
 }
diff --git a/man/missingTreeUCBNQC.Rd b/man/missingTreeUCBNQC.Rd
new file mode 100644
index 0000000..cee2dbd
--- /dev/null
+++ b/man/missingTreeUCBNQC.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc_ucbn.R
+\name{missingTreeUCBNQC}
+\alias{missingTreeUCBNQC}
+\title{Check that there is at least one tree for every observation entry and flag entries with more than 250 trees}
+\usage{
+missingTreeUCBNQC(data)
+}
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
+\description{
+Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+}
diff --git a/man/treeChangeQC.Rd b/man/treeChangeQC.Rd
new file mode 100644
index 0000000..31a386d
--- /dev/null
+++ b/man/treeChangeQC.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc.R
+\name{treeChangeQC}
+\alias{treeChangeQC}
+\title{Flag large changes in number of tree between years}
+\usage{
+treeChangeQC(data = fetchAndWrangleAspen())
+}
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
+\description{
+Flag large changes in number of tree between years
+}
diff --git a/man/treeChangeUCBNQC.Rd b/man/treeChangeUCBNQC.Rd
new file mode 100644
index 0000000..4d2854d
--- /dev/null
+++ b/man/treeChangeUCBNQC.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc_ucbn.R
+\name{treeChangeUCBNQC}
+\alias{treeChangeUCBNQC}
+\title{Flag large changes in number of tree between years}
+\usage{
+treeChangeUCBNQC(data)
+}
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
+\description{
+Flag large changes in number of tree between years
+}
diff --git a/man/treeSpeciesQC.Rd b/man/treeSpeciesQC.Rd
index e44912b..aaca94f 100644
--- a/man/treeSpeciesQC.Rd
+++ b/man/treeSpeciesQC.Rd
@@ -6,6 +6,9 @@
 \usage{
 treeSpeciesQC(data = fetchAndWrangleAspen())
 }
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
 \description{
 Check that no entries in the observation table have a SpeciesCode that is NA or missing
 }
diff --git a/man/treeSpeciesUCBNQC.Rd b/man/treeSpeciesUCBNQC.Rd
new file mode 100644
index 0000000..b3b2bbe
--- /dev/null
+++ b/man/treeSpeciesUCBNQC.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc_ucbn.R
+\name{treeSpeciesUCBNQC}
+\alias{treeSpeciesUCBNQC}
+\title{Check that no entries in the observation table have a SpeciesCode that is NA or missing}
+\usage{
+treeSpeciesUCBNQC(data)
+}
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
+\description{
+Check that no entries in the observation table have a SpeciesCode that is NA or missing
+}
diff --git a/man/unknownSpeciesQC.Rd b/man/unknownSpeciesQC.Rd
index cbb574d..5f7bc1c 100644
--- a/man/unknownSpeciesQC.Rd
+++ b/man/unknownSpeciesQC.Rd
@@ -6,6 +6,9 @@
 \usage{
 unknownSpeciesQC(data = fetchAndWrangleAspen())
 }
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
 \description{
 Check that unknown species entries do not have any live trees
 }
diff --git a/man/unknownSpeciesUCBNQC.Rd b/man/unknownSpeciesUCBNQC.Rd
new file mode 100644
index 0000000..e04c4ff
--- /dev/null
+++ b/man/unknownSpeciesUCBNQC.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aspen_qc_ucbn.R
+\name{unknownSpeciesUCBNQC}
+\alias{unknownSpeciesUCBNQC}
+\title{Check that unknown species entries do not have any live trees}
+\usage{
+unknownSpeciesUCBNQC(data)
+}
+\arguments{
+\item{data}{Aspen dataset to run through QC functions}
+}
+\description{
+Check that unknown species entries do not have any live trees
+}
diff --git a/man/writePine.Rd b/man/writeAspen.Rd
similarity index 65%
rename from man/writePine.Rd
rename to man/writeAspen.Rd
index 8c59de4..d61bb3a 100644
--- a/man/writePine.Rd
+++ b/man/writeAspen.Rd
@@ -1,19 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/utils.R
-\name{writePine}
-\alias{writePine}
-\title{Write pine data to CSV}
+\name{writeAspen}
+\alias{writeAspen}
+\title{Write aspen data to CSV}
 \usage{
-writePine(
+writeAspen(
+  all_data,
   data_dir = here::here("data", "final"),
   dictionary_dir = here::here("data", "dictionary"),
   dictionary_filenames = c(tables = "data_dictionary_tables.txt", attributes =
     "data_dictionary_attributes.txt", categories = "data_dictionary_categories.txt"),
   verbose = FALSE,
-  ...
+  removeColumns = TRUE,
+  cols_to_remove = c("Editor", "Creator")
 )
 }
 \arguments{
+\item{all_data}{Output of \code{fetchRawData()}}
+
 \item{data_dir}{Folder to store data csv's in}
 
 \item{dictionary_dir}{Folder to store data dictionaries in}
@@ -22,8 +26,10 @@ writePine(
 
 \item{verbose}{Output feedback to console?}
 
-\item{...}{Arguments to pass to \code{\link[=filterPine]{filterPine()}}}
+\item{removeColumns}{Should columns be removed?}
+
+\item{cols_to_remove}{Columns that should be removed, \code{c("Editor", "Creator")} is the default because they can contain personally identifiable information}
 }
 \description{
-Write pine data to CSV
+Write aspen data to CSV
 }
diff --git a/scripts/visualizations/aspen_visualizations.qmd b/scripts/visualizations/aspen_visualizations.qmd
index 4338e6e..f1670d9 100644
--- a/scripts/visualizations/aspen_visualizations.qmd
+++ b/scripts/visualizations/aspen_visualizations.qmd
@@ -9,9 +9,11 @@ execute:
   warning: false
 ---
 
-```{r, message = FALSE}
+```{r, message=FALSE}
 library(tidyverse)
 library(khroma)
+library(viridis)
+library(plotly)
 aspen <- aspen::fetchAndWrangleAspen()
 ```
 
@@ -35,31 +37,37 @@ aspen$data$AllSites <- aspen$data$AllSites %>%
 
 # Vegetation Community
 
-#### Density of Live Aspens
+#### Average Density of Live Aspens
 
 ```{r}
 # 4 meter radius of plots
 # area = pi*r^2
 # density = #/ha
 
-aspenDensity <- aspen$data$Observations %>%
+# this is finding the density for each plot and then averaging it for the communities
+
+aspenDensity2 <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
-  filter(SpeciesCode == "POTR5") %>%
+  filter(SpeciesCode == "POTR5" & VisitType == "Primary") %>%
   # Add up LIVE POTR5 for each row
-  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/(pi * 4^2/10000),
+         totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
   # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
   group_by(Community) %>%
-  summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-            sumLiveTrees = sum(totalLiveTrees),
-            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
-
-
-aspenDensityGraph <- aspenDensity %>%
-  ggplot(aes(x= Community, y = density)) +
+  summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
+
+aspenDensityGraph <- aspenDensity2 %>%
+  ggplot(aes(x= Community, y = avgDensity, label = numberOfPlots)) +
   geom_col() +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2) +
   theme_minimal() +
-  labs(title = "Aspen Density",
+  labs(title = "Average Aspen Density",
        x = "Community", y = "Density (#/ha)") + 
   theme(plot.title = element_text(hjust = 0.5)) +
   theme(axis.text.x = element_text(size = 6, angle = 315))
@@ -67,134 +75,309 @@ aspenDensityGraph <- aspenDensity %>%
 plotly::ggplotly(aspenDensityGraph)
 ```
 
-#### Density of Live Aspens per Plot
 
 ```{r}
-# 4 meter radius of plots
-# area = pi*r^2
-# density = #/ha
+# # 4 meter radius of plots
+# # area = pi*r^2
+# # density = #/ha
+# 
+# 
+# # this is the density of the total community (add up total number of trees in community and divide by total area of plots)
+# aspenDensity <- aspen$data$Observations %>%
+#   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+#   filter(VisitType == "Primary") %>%
+#   # filter for POTR5
+#   filter(SpeciesCode == "POTR5") %>%
+#   # Add up LIVE POTR5 for each row
+#   mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+#   # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+#   group_by(Community) %>%
+#   summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+#             sumLiveTrees = sum(totalLiveTrees),
+#             numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+# 
+# 
+# aspenDensityGraph <- aspenDensity %>%
+#   ggplot(aes(x= Community, y = density)) +
+#   geom_col() +
+#   theme_minimal() +
+#   labs(title = "Aspen Density",
+#        x = "Community", y = "Density (#/ha)") + 
+#   theme(plot.title = element_text(hjust = 0.5)) +
+#   theme(axis.text.x = element_text(size = 6, angle = 315))
+# 
+# plotly::ggplotly(aspenDensityGraph)
+```
+
+#### Density of Live Aspens per Plot
 
+```{r}
 aspenDensity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
-  filter(SpeciesCode == "POTR5") %>%
+  filter(SpeciesCode == "POTR5" & VisitType == "Primary") %>%
   # Find density for each plot
-  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000))
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%
+# Filter out communities with not enough points to graph
+group_by(Community) %>%
+  mutate(numberOfPlots = n()) %>%
+  ungroup() %>%
+  filter(numberOfPlots >2)
 
 
 aspenDensityGraph <- aspenDensity %>%
-  ggplot(aes(x= Community, y = density)) +
-  geom_violin() +
-  #geom_col() +
+  ggplot(aes(x= Community, y = density, fill = Community, label = numberOfPlots)) +
+    geom_violin() +
+    geom_boxplot(width=0.1, color="grey", alpha=0.5) +
+  stat_summary(fun = "mean", colour = "red", size = 2, geom = "point") +
+  scale_fill_viridis(discrete = TRUE) +
   theme_minimal() +
   labs(title = "Aspen Density",
        x = "Community", y = "Density (#/ha)") + 
   theme(plot.title = element_text(hjust = 0.5)) +
-  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0))
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0)) + 
+  guides(fill="none")
 
 aspenDensityGraph
 ```
 
-#### Density of Conifers
 
 ```{r}
-coniferDensity <- aspen$data$Observations %>%
+aspenDensityGraph <- aspenDensity %>%
+  ggplot(aes(x= Community, y = density, label = numberOfPlots)) +
+    #geom_violin() +
+  geom_boxplot(width=0.3, outlier.shape = NA) +
+  geom_point(alpha = 0.5, position=position_jitter(height=.5, width=.25)) +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Community", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0)) + 
+  guides(fill="none")
+
+aspenDensityGraph
+```
+
+
+```{r}
+fig <- aspenDensity %>%
+  plotly::plot_ly(
+    x = ~Community,
+    y = ~density,
+    split = ~Community,
+    color = ~Community,
+    colors = "Dark2",
+    type = 'violin',
+    box = list(visible = T),
+    meanline = list(visible = T)) 
+
+fig <- fig %>%
+  layout(
+    xaxis = list(
+      title = "Community"),
+    yaxis = list(
+      title = "Density",
+      zeroline = F,
+      range = list(0, 40000)),
+    showlegend = FALSE)
+
+fig
+```
+
+
+
+#### Average Density of Conifers
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# density = #/ha
+
+# this is finding the density for each plot and then averaging it for the communities
+
+
+# TODO: make it so the different plots in the community are aggregated before density is calculated
+aspenDensity2 <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
-  # Find Number of plot in each watershed
-  # group_by(Community) %>%
-  # mutate(numberOfPlots = n_distinct(Site)) %>%
-  # ungroup() %>%
   # filter for POTR5
-  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
-  # Add up LIVE POTR5 for each row
-  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2" & VisitType == "Primary") %>%
+  # Aggregate to get the total number of conifers at each site
+  group_by(Site, Community) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
+    # Add up LIVE POTR5 for each row
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/(pi * 4^2/10000),
+         totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
   # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
   group_by(Community) %>%
-  summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-            sumLiveTrees = sum(totalLiveTrees),
-            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
-
-
-coniferDensityGraph <- coniferDensity %>%
-  ggplot(aes(x= Community, y = density)) +
+  summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
+
+aspenDensityGraph <- aspenDensity2 %>%
+  ggplot(aes(x= Community, y = avgDensity, label = numberOfPlots)) +
   geom_col() +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2) +
   theme_minimal() +
-  labs(title = "Conifer Density",
+  labs(title = "Average Conifer Density",
        x = "Community", y = "Density (#/ha)") + 
   theme(plot.title = element_text(hjust = 0.5)) +
   theme(axis.text.x = element_text(size = 6, angle = 315))
 
-plotly::ggplotly(coniferDensityGraph)
+plotly::ggplotly(aspenDensityGraph)
 ```
 
 
-#### Density of Conifers per Plot
-
 ```{r}
-# 4 meter radius of plots
-# area = pi*r^2
-# density = #/ha
+# coniferDensity <- aspen$data$Observations %>%
+#   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+#   # filter for POTR5
+#   filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
+#   # Add up LIVE POTR5 for each row
+#   mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+#   # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+#   group_by(Community) %>%
+#   summarise(density = sum(totalLiveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+#             sumLiveTrees = sum(totalLiveTrees),
+#             numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+# 
+# 
+# coniferDensityGraph <- coniferDensity %>%
+#   ggplot(aes(x= Community, y = density)) +
+#   geom_col() +
+#   theme_minimal() +
+#   labs(title = "Conifer Density",
+#        x = "Community", y = "Density (#/ha)") + 
+#   theme(plot.title = element_text(hjust = 0.5)) +
+#   theme(axis.text.x = element_text(size = 6, angle = 315))
+# 
+# plotly::ggplotly(coniferDensityGraph)
+```
 
-# TODO: should i aggregate all the different types of conifers per plot before graphing?? - probably yea i think so
 
+#### Density of Conifers per Plot
+
+```{r}
 coniferDensity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
-  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
-  # Find total live trees in each row
-  mutate(treeSum = Class1+Class2+Class3+Class4+Class5) %>%
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2" & VisitType == "Primary") %>%
+    # Aggregate to get the total number of conifers at each site
   group_by(Site, Community) %>%
-  summarise(totalTrees = sum(treeSum)) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
   # Find density for each plot
-  mutate(density = (totalTrees)/((pi * 4^2)/10000))
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%
+# Filter out communities with not enough points to graph
+group_by(Community) %>%
+  mutate(numberOfPlots = n()) %>%
+  ungroup() %>%
+  filter(numberOfPlots >2)
 
 
 coniferDensityGraph <- coniferDensity %>%
-  ggplot(aes(x= Community, y = density, fill = Community)) +
-  geom_violin() +
-  #geom_col() +
+  ggplot(aes(x= Community, y = density, fill = Community, label = numberOfPlots)) +
+    geom_violin() +
+    geom_boxplot(width=0.1, color="grey", alpha=0.2) +
+  stat_summary(fun = "mean", colour = "red", size = 2, geom = "point") +
+  scale_fill_viridis(discrete = TRUE) +
   theme_minimal() +
   labs(title = "Conifer Density",
        x = "Community", y = "Density (#/ha)") + 
   theme(plot.title = element_text(hjust = 0.5)) +
-  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0)) +
-  scale_fill_light() + 
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0)) + 
   guides(fill="none")
 
 coniferDensityGraph
 ```
 
+
+```{r}
+# # 4 meter radius of plots
+# # area = pi*r^2
+# # density = #/ha
+# 
+# # TODO: should i aggregate all the different types of conifers per plot before graphing?? - probably yea i think so
+# 
+# coniferDensity <- aspen$data$Observations %>%
+#   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+#   # filter for POTR5
+#   filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
+#   # Find total live trees in each row
+#   mutate(treeSum = Class1+Class2+Class3+Class4+Class5) %>%
+#   group_by(Site, Community) %>%
+#   summarise(totalTrees = sum(treeSum)) %>%
+#   # Find density for each plot
+#   mutate(density = (totalTrees)/((pi * 4^2)/10000))
+# 
+# 
+# coniferDensityGraph <- coniferDensity %>%
+#   ggplot(aes(x= Community, y = density, fill = Community)) +
+#   geom_violin() +
+#   #geom_col() +
+#   theme_minimal() +
+#   labs(title = "Conifer Density",
+#        x = "Community", y = "Density (#/ha)") + 
+#   theme(plot.title = element_text(hjust = 0.5)) +
+#   theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0)) +
+#   scale_fill_light() + 
+#   guides(fill="none")
+# 
+# coniferDensityGraph
+```
+
 #### Density of Aspen vs Conifers
 
 ```{r}
 compareDensity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
     # filter out one species that is not aspen or conifer
-  filter(SpeciesCode != "BEUC2") %>%
+  filter(SpeciesCode != "BEUC2" & VisitType == "Primary") %>%
   mutate(isAspen = case_when(
     SpeciesCode == "POTR5" ~ 'Aspen',
     .default = 'Conifer'
   )) %>%
+    # Aggregate to get the total number of conifers at each site
+  group_by(Site, Community, isAspen) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
   # Add up live trees for each row
-  mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
-  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/(pi * 4^2/10000)) %>%
   group_by(Community, isAspen) %>%
-  summarise(density = sum(liveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-            totalLiveTrees = sum(liveTrees),
-            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
+  summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
 
 
 compareDensityGraph <- compareDensity %>%
-  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  ggplot(aes(x= Community, y = avgDensity, fill = as.factor(isAspen), label = numberOfPlots)) +
   geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2, position = position_dodge(0.9)) +
   theme_minimal() +
   labs(title = "Aspen and Conifer Density",
        x = "Community", y = "Density (#/ha)",
        fill = "Species") + 
   theme(plot.title = element_text(hjust = 0.5)) +
   theme(axis.text.x = element_text(size = 6, angle = 315))  +
-  scale_fill_light()  +
-  theme(axis.text.x = element_text(size = 6, angle = 315))
+  scale_fill_light()
 
 plotly::ggplotly(compareDensityGraph)
 ```
@@ -206,21 +389,22 @@ plotly::ggplotly(compareDensityGraph)
 compareDensity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
     # filter out one species that is not aspen or conifer
-  filter(SpeciesCode != "BEUC2") %>%
+  filter(SpeciesCode != "BEUC2" & VisitType == "Primary") %>%
   mutate(isAspen = case_when(
     SpeciesCode == "POTR5" ~ 'Aspen',
     .default = 'Conifer'
-  )) %>%
+  )) %>% 
+      # Aggregate to get the total number of conifers at each site
+  group_by(Site, Community, isAspen) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
     # Find density for each plot
-  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000))
-  # # Add up live trees for each row
-  # mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
-  # # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
-  # group_by(Community, isAspen) %>%
-  # summarise(density = sum(liveTrees)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-  #           totalLiveTrees = sum(liveTrees),
-  #           numberOfPlotsInCommunity = first(numberOfPlotsInCommunity))
-
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%
+  filter(Community != "Curl-leaf Mountain-mahogany Shrubland & Woodland Complex")
 
 compareDensityGraph <- compareDensity %>%
   ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
@@ -244,19 +428,23 @@ compareDensityGraph
 ageGroupCommunity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
-  filter(SpeciesCode == "POTR5") %>%
+  filter(SpeciesCode == "POTR5" & VisitType == "Primary") %>%
   # Pivot longer to make calculations easier
   pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000)) %>%
   # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
   group_by(Community, class) %>%
-  summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-            sumLiveTrees = sum(treeCount),
-            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
-
+    summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
 
 ageGroupCommunityGraph <- ageGroupCommunity %>%
-  ggplot(aes(x= Community, y = density, fill = factor(class))) +
+  ggplot(aes(x= Community, y = avgDensity, fill = factor(class))) +
   geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.3, position = position_dodge(0.9)) +
   theme_minimal() +
   labs(title = "Age Class Density",
        x = "Community", y = "Density (#/ha)",
@@ -275,98 +463,236 @@ plotly::ggplotly(ageGroupCommunityGraph)
 ageGroupCommunity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
-  filter(SpeciesCode == "POTR5") %>%
+  filter(SpeciesCode == "POTR5" & VisitType == "Primary") %>%
   # Pivot longer to make calculations easier
   pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
-  mutate(density = treeCount/(pi * 4^2/10000))
-  # # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
-  # group_by(Community, class) %>%
-  # summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-  #           sumLiveTrees = sum(treeCount),
-  #           numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
+  mutate(density = treeCount/(pi * 4^2/10000)) %>%
+  filter(Community != "Curl-leaf Mountain-mahogany Shrubland & Woodland Complex")
 
 
 ageGroupCommunityGraph <- ageGroupCommunity %>%
   ggplot(aes(x= Community, y = density, fill = factor(class))) +
   geom_boxplot() +
-  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
   theme_minimal() +
   labs(title = "Age Class Density",
        x = "Community", y = "Density (#/ha)",
        fill = "Age Class") + 
   theme(plot.title = element_text(hjust = 0.5)) +
   scale_fill_light()  +
-  theme(axis.text.x = element_text(size = 6, angle = 315))
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))
 
-#plotly::ggplotly(ageGroupCommunityGraph)
 ageGroupCommunityGraph
 ```
 
-#### Density of Aspen Age Groups Line Graph
+
+
+```{r}
+# #### Density of Aspen Age Groups Line Graph
+# 
+# ageGroupCommunity <- aspen$data$Observations %>%
+#   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+#   # filter for POTR5
+#   filter(SpeciesCode == "POTR5") %>%
+#   # Pivot longer to make calculations easier
+#   pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+#   # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+#   group_by(Community, class) %>%
+#   summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
+#             sumLiveTrees = sum(treeCount),
+#             numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
+# 
+# 
+# ageGroupCommunityGraph <- ageGroupCommunity %>%
+#   ggplot(aes(x= class, y = density, color = factor(Community))) +
+#   geom_line(aes(group = factor(Community))) + 
+#   #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+#   theme_minimal() +
+#   labs(title = "Age Class Density",
+#        x = "Age Class", y = "Density (#/ha)",
+#        color = "Community") + 
+#   theme(plot.title = element_text(hjust = 0.5)) +
+#   scale_fill_light() + 
+#   guides(color="none")
+# # +
+# #   theme(axis.text.x = element_text(size = 6, angle = 315))
+# 
+# #ageGroupCommunityGraph
+# plotly::ggplotly(ageGroupCommunityGraph)
+```
+
+
+#### Density of Conifer Age Groups
 
 ```{r}
 ageGroupCommunity <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
-  filter(SpeciesCode == "POTR5") %>%
+  filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2" & VisitType == "Primary") %>%
   # Pivot longer to make calculations easier
   pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
-  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
-  group_by(Community, class) %>%
-  summarise(density = sum(treeCount)/((first(numberOfPlotsInCommunity) * pi * 4^2)/10000),
-            sumLiveTrees = sum(treeCount),
-            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity)) 
-
+  mutate(density = treeCount/(pi * 4^2/10000)) 
+# %>%
+#   # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+#   group_by(Community, class) %>%
+#     summarise(avgDensity = mean(density),
+#             sd = sd(density), 
+#             numberOfPlots = n()) %>%
+#   # Find values for error bars
+#   mutate(ymin = avgDensity - sd,
+#          ymax = avgDensity + sd)
 
 ageGroupCommunityGraph <- ageGroupCommunity %>%
-  ggplot(aes(x= class, y = density, color = factor(Community))) +
-  geom_line(aes(group = factor(Community))) + 
-  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  ggplot(aes(x= Community, y = density, fill = factor(class))) +
+  geom_boxplot() +
+  # geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  # geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.5, position = position_dodge(0.9)) +
   theme_minimal() +
   labs(title = "Age Class Density",
-       x = "Age Class", y = "Density (#/ha)",
-       color = "Community") + 
+       x = "Community", y = "Density (#/ha)",
+       fill = "Age Class") + 
   theme(plot.title = element_text(hjust = 0.5)) +
-  scale_fill_light() + 
-  guides(color="none")
-# +
-#   theme(axis.text.x = element_text(size = 6, angle = 315))
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
 
-#ageGroupCommunityGraph
-plotly::ggplotly(ageGroupCommunityGraph)
+ageGroupCommunityGraph
 ```
 
-#### Frequency of Disturbances
+
+#### Density of Aspen vs Conifer Age Groups
 
 ```{r}
-# Area of plots in which a disturbance occurs / total area of plots (for each community)
+compareAgeDensity <- aspen$data$Observations %>%
+  left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(SpeciesCode != "BEUC2" & VisitType == "Primary") %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POTR5" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+    # Find density for each plot
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%  
+# Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000))
+```
 
-disturbancesFrequency <- aspen$data$Disturbances %>%
-  group_by(Site) %>%
-  # Group data so every site with a disturbance has only one entry
-  summarise(Disturbance = first(Disturbance)) %>%
-  # Join disturbance and all site data
-  full_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
-  group_by(Community) %>%
-  # Find out how many plots in a watershed were disturbed
-  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
-            numberOfPlotsInCommunity = first(numberOfPlotsInCommunity),
-            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInCommunity) * pi * 4^2)*100)
+::: panel-tabset
+##### Class 1
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class1") %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  scale_fill_viridis(discrete = TRUE) +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 1",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0)) +
+  ylim(c(0, 20000))
 
-disturbancesGraph <- disturbancesFrequency %>%
-  ggplot(aes(x = Community, y = frequency)) +
-  geom_col() +
+compareAgeDensityGraph
+```
+##### Class 2
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class2") %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
   theme_minimal() +
-  labs(title = "Disturbance Frequency",
-       x = "Community", y = "Frequency") + 
+  labs(title = "Aspen and Conifer Density Class 2",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
   theme(plot.title = element_text(hjust = 0.5)) +
-  theme(axis.text.x = element_text(size = 6, angle = 315))
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
 
-plotly::ggplotly(disturbancesGraph)
+compareAgeDensityGraph
+```
+
+##### Class 3
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class3") %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 3",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
 ```
 
+##### Class 4
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class4") %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 4",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
 
-#### Frequency of Disturbances by Plot
+compareAgeDensityGraph
+```
+
+##### Class 5
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class5") %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 5",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+##### Class 6
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class6") %>%
+  ggplot(aes(x= Community, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 6",
+       x = "Community", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+:::
+
+#### Frequency of Disturbances
 
 ```{r}
 # Area of plots in which a disturbance occurs / total area of plots (for each community)
@@ -395,6 +721,7 @@ disturbancesGraph <- disturbancesFrequency %>%
 plotly::ggplotly(disturbancesGraph)
 ```
 
+
 #### Frequency of Pests
 
 ```{r}
diff --git a/scripts/visualizations/aspen_visualizations_ucbn.qmd b/scripts/visualizations/aspen_visualizations_ucbn.qmd
new file mode 100644
index 0000000..712a913
--- /dev/null
+++ b/scripts/visualizations/aspen_visualizations_ucbn.qmd
@@ -0,0 +1,845 @@
+---
+title: "Aspen Preliminary Visualizations"
+format: 
+  html:
+    embed-resources: true
+execute: 
+  echo: false
+  message: false
+  warning: false
+---
+
+```{r, message=FALSE}
+library(tidyverse)
+library(khroma)
+library(viridis)
+library(plotly)
+aspen <- aspen::fetchAndWrangleUCBNAspen()
+```
+
+```{r}
+# find number of plots in each watershed
+
+# TODO: update this to Allsites tables once not used sites are removed
+
+# aspen$data$AllSites <- aspen$data$AllSites %>%
+#   filter(Site %in% aspen$data$SiteVisit$Site) %>%
+#       # Find Number of plot in each watershed
+#   group_by(Stratum) %>%
+#   mutate(numberOfPlotsInWatershed = n()) %>%
+#   ungroup() %>%
+#   group_by(Community) %>%
+#   # Find number of plots in each community
+#   mutate(numberOfPlotsInCommunity = n()) %>%
+#   ungroup()
+
+# aspen$data$SiteVisit <- aspen$data$SiteVisit %>%
+#   #filter(Site %in% aspen$data$SiteVisit$Site) %>%
+#       # Find Number of plot in each watershed
+#   group_by(Stand) %>%
+#   mutate(numberOfPlotsInStand = n_distinct(Unique_ID)) %>%
+#   ungroup() 
+
+numberOfPlots <- aspen$data$SiteVisit %>%
+  group_by(Stand) %>%
+  summarise(numberOfPlotsInStand = n_distinct(Unique_ID))
+```
+
+
+# Stand
+
+#### Average Density of Live Aspens
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# density = #/ha
+
+# this is finding the density for each plot and then averaging it for the communities
+
+aspenDensity2 <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POPTRE") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/(pi * 4^2/10000),
+         totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stand) %>%
+  summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
+
+aspenDensityGraph <- aspenDensity2 %>%
+  ggplot(aes(x= Stand, y = avgDensity, label = numberOfPlots)) +
+  geom_col() +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2) +
+  theme_minimal() +
+  labs(title = "Average Aspen Density",
+       x = "Stand", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(aspenDensityGraph)
+```
+
+#### Density of Live Aspens Boxplot
+
+```{r}
+aspenDensity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POPTRE") %>%
+  # Find density for each plot
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%
+# Filter out communities with not enough points to graph
+group_by(Stand) %>%
+  mutate(numberOfPlots = n()) %>%
+  ungroup() %>%
+  filter(numberOfPlots >2)
+
+
+# aspenDensityGraph <- aspenDensity %>%
+#   ggplot(aes(x= Stand, y = density, fill = Stand, label = numberOfPlots)) +
+#     geom_violin() +
+#     geom_boxplot(width=0.1, color="grey", alpha=0.5) +
+#   stat_summary(fun = "mean", colour = "red", size = 2, geom = "point") +
+#   scale_fill_viridis(discrete = TRUE) +
+#   theme_minimal() +
+#   labs(title = "Aspen Density",
+#        x = "Stand", y = "Density (#/ha)") + 
+#   theme(plot.title = element_text(hjust = 0.5)) +
+#   theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0)) + 
+#   guides(fill="none")
+# 
+# aspenDensityGraph
+```
+
+
+```{r}
+aspenDensityGraph <- aspenDensity %>%
+  ggplot(aes(x= Stand, y = density, label = numberOfPlots)) +
+    #geom_violin() +
+  geom_boxplot(width=0.3, outlier.shape = NA) +
+  geom_point(alpha = 0.5, position=position_jitter(height=.5, width=.25)) +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Stand", y = "Density (#/ha)") +
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0)) + 
+  guides(fill="none")
+
+aspenDensityGraph
+```
+
+
+```{r}
+fig <- aspenDensity %>%
+  plotly::plot_ly(
+    x = ~Stand,
+    y = ~density,
+    split = ~Stand,
+    color = ~Stand,
+    colors = "Dark2",
+    type = 'violin',
+    box = list(visible = T),
+    meanline = list(visible = T)) 
+
+fig <- fig %>%
+  layout(
+    xaxis = list(
+      title = "Stand"),
+    yaxis = list(
+      title = "Density",
+      zeroline = F,
+      range = list(0, 40000)),
+    showlegend = FALSE)
+
+fig
+```
+
+
+
+#### Average Density of Conifers
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# density = #/ha
+
+# this is finding the density for each plot and then averaging it for the communities
+
+
+# TODO: make it so the different plots in the community are aggregated before density is calculated
+aspenDensity2 <- aspen$data$Observations %>%
+  # left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POPTRE" & !is.na(SpeciesCode)) %>%
+  # Aggregate to get the total number of conifers at each site
+  group_by(Unique_ID, Stand) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
+    # Add up LIVE POTR5 for each row
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/(pi * 4^2/10000),
+         totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stand) %>%
+  summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
+
+aspenDensityGraph <- aspenDensity2 %>%
+  ggplot(aes(x= Stand, y = avgDensity, label = numberOfPlots)) +
+  geom_col() +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2) +
+  theme_minimal() +
+  labs(title = "Average Conifer Density",
+       x = "Stand", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(aspenDensityGraph)
+```
+
+
+
+```{r}
+#### Density of Conifers per Plot
+
+
+# coniferDensity <- aspen$data$Observations %>%
+#   #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+#   left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+#   # filter for POTR5
+#   filter(SpeciesCode != "POPTRE" & !is.na(SpeciesCode)) %>%
+#     # Aggregate to get the total number of conifers at each site
+#   group_by(Unique_ID, Stand) %>%
+#   summarise(Class1 = sum(Class1),
+#             Class2 = sum(Class2),
+#             Class3 = sum(Class3),
+#             Class4 = sum(Class4),
+#             Class5 = sum(Class5)) %>%
+#   ungroup() %>%
+#   # Find density for each plot
+#   mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%
+# # Filter out communities with not enough points to graph
+# group_by(Stand) %>%
+#   mutate(numberOfPlots = n()) %>%
+#   ungroup() %>%
+#   filter(numberOfPlots >2)
+# 
+# 
+# coniferDensityGraph <- coniferDensity %>%
+#   ggplot(aes(x= Stand, y = density, fill = Stand, label = numberOfPlots)) +
+#     geom_violin() +
+#     geom_boxplot(width=0.1, color="grey", alpha=0.2) +
+#   stat_summary(fun = "mean", colour = "red", size = 2, geom = "point") +
+#   scale_fill_viridis(discrete = TRUE) +
+#   theme_minimal() +
+#   labs(title = "Conifer Density",
+#        x = "Stand", y = "Density (#/ha)") + 
+#   theme(plot.title = element_text(hjust = 0.5)) +
+#   theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0.0)) + 
+#   guides(fill="none")
+# 
+# coniferDensityGraph
+```
+
+#### Density of Aspen vs Conifers
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(!is.na(SpeciesCode)) %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POPTRE" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+    # Aggregate to get the total number of conifers at each site
+  group_by(Unique_ID, Stand, isAspen) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
+  # Add up live trees for each row
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/(pi * 4^2/10000)) %>%
+  group_by(Stand, isAspen) %>%
+  summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
+
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Stand, y = avgDensity, fill = as.factor(isAspen), label = numberOfPlots)) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2, position = position_dodge(0.9)) +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))  +
+  scale_fill_light()
+
+plotly::ggplotly(compareDensityGraph)
+```
+
+
+#### Density of Aspen vs Conifers per Plot
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(!is.na(SpeciesCode)) %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POPTRE" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>% 
+      # Aggregate to get the total number of conifers at each site
+  group_by(Unique_ID, Stand, isAspen) %>%
+  summarise(Class1 = sum(Class1),
+            Class2 = sum(Class2),
+            Class3 = sum(Class3),
+            Class4 = sum(Class4),
+            Class5 = sum(Class5)) %>%
+  ungroup() %>%
+    # Find density for each plot
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) 
+# %>%
+#   filter(Community != "Curl-leaf Mountain-mahogany Shrubland & Woodland Complex")
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))  +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))
+
+compareDensityGraph
+```
+
+#### Density of Aspen Age Groups
+
+```{r}
+ageGroupCommunity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POPTRE") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000)) %>%
+  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Stand, class) %>%
+    summarise(avgDensity = mean(density),
+            sd = sd(density), 
+            numberOfPlots = n()) %>%
+  # Find values for error bars
+  mutate(ymin = avgDensity - sd,
+         ymax = avgDensity + sd)
+
+ageGroupCommunityGraph <- ageGroupCommunity %>%
+  ggplot(aes(x= Stand, y = avgDensity, fill = factor(class))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.3, position = position_dodge(0.9)) +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(ageGroupCommunityGraph)
+```
+
+
+#### Density of Aspen Age Groups Boxplot
+
+```{r}
+ageGroupCommunity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POPTRE") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000)) 
+# %>%
+#   filter(Community != "Curl-leaf Mountain-mahogany Shrubland & Woodland Complex")
+
+
+ageGroupCommunityGraph <- ageGroupCommunity %>%
+  ggplot(aes(x= Stand, y = density, fill = factor(class))) +
+  geom_boxplot() +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))
+
+ageGroupCommunityGraph
+```
+
+#### Density of Conifer Age Groups
+
+```{r}
+ageGroupCommunity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POPTRE" & !is.na(SpeciesCode)) %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000)) 
+# %>%
+#   # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+#   group_by(Community, class) %>%
+#     summarise(avgDensity = mean(density),
+#             sd = sd(density), 
+#             numberOfPlots = n()) %>%
+#   # Find values for error bars
+#   mutate(ymin = avgDensity - sd,
+#          ymax = avgDensity + sd)
+
+ageGroupCommunityGraph <- ageGroupCommunity %>%
+  ggplot(aes(x= Stand, y = density, fill = factor(class))) +
+  geom_boxplot() +
+  # geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  # geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.5, position = position_dodge(0.9)) +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+ageGroupCommunityGraph
+```
+
+
+#### Density of Aspen vs Conifer Age Groups
+
+```{r}
+compareAgeDensity <- aspen$data$Observations %>%
+  #left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
+  left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+    # filter out one species that is not aspen or conifer
+  filter(!is.na(SpeciesCode)) %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POPTRE" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+    # Find density for each plot
+  mutate(density = (Class1+Class2+Class3+Class4+Class5)/((pi * 4^2)/10000)) %>%  
+# Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  mutate(density = treeCount/(pi * 4^2/10000))
+```
+
+::: panel-tabset
+##### Class 1
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class1") %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  scale_fill_viridis(discrete = TRUE) +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 1",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0)) +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+##### Class 2
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class2") %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 2",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+##### Class 3
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class3") %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 3",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+##### Class 4
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class4") %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 4",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+##### Class 5
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class5") %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 5",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+##### Class 6
+```{r}
+compareAgeDensityGraph <- compareAgeDensity %>%
+  filter(class == "Class6") %>%
+  ggplot(aes(x= Stand, y = density, fill = as.factor(isAspen))) +
+  geom_boxplot() +
+  #geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density Class 6",
+       x = "Stand", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()  +
+  theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0))  +
+  ylim(c(0, 20000))
+
+compareAgeDensityGraph
+```
+
+:::
+
+#### Frequency of Disturbances
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each community)
+
+disturbancesFrequency <- aspen$data$Disturbances %>%
+  group_by(Unique_ID, Stand) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Disturbance = first(Disturbance)) %>%
+  # Join disturbance and all site data
+  left_join(select(numberOfPlots, any_of(c("Unique_ID", "Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  group_by(Stand) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
+            numberOfPlotsInStand = first(numberOfPlotsInStand),
+            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInStand) * pi * 4^2)*100)
+
+disturbancesGraph <- disturbancesFrequency %>%
+  ggplot(aes(x = Stand, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Disturbance Frequency",
+       x = "Stand", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(disturbancesGraph)
+```
+
+
+#### Frequency of Pests
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each community)
+
+pestFrequency <- aspen$data$Pests %>%
+  group_by(Unique_ID, Stand) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Pest = first(Pest)) %>%
+  # Join disturbance and all site data
+  left_join(select(numberOfPlots, any_of(c("Unique_ID", "Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
+  group_by(Stand) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithPests = sum(!is.na(Pest)), 
+            numberOfPlotsInStand = first(numberOfPlotsInStand),
+            frequency = (first(plotsWithPests) * pi * 4^2)/(first(numberOfPlotsInStand) * pi * 4^2)*100)
+
+pestGraph <- pestFrequency %>%
+  ggplot(aes(x = Stand, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Pest Frequency",
+       x = "Stand", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  theme(axis.text.x = element_text(size = 6, angle = 315))
+
+plotly::ggplotly(pestGraph)
+```
+
+# Park
+
+#### Density of Live Aspens
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# denisty = #/ha
+
+
+  
+aspenParkDensity <- aspen$data$Observations %>%
+  # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Unique_ID)) %>%
+  ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POPTRE") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlots) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlots = first(numberOfPlots))
+
+
+aspenParkDensityGraph <- aspenParkDensity %>%
+  ggplot(aes(x= Park, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Aspen Density",
+       x = "Park", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(aspenParkDensityGraph)
+```
+
+#### Density of Conifers
+
+```{r}
+# 4 meter radius of plots
+# area = pi*r^2
+# denisty = #/ha
+
+
+  
+coniferParkDensity <- aspen$data$Observations %>%
+  # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Unique_ID)) %>%
+  ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode != "POPTRE" & SpeciesCode != "BEUC2") %>%
+  # Add up LIVE POTR5 for each row
+  mutate(totalLiveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park) %>%
+  summarise(density = sum(totalLiveTrees)/((first(numberOfPlots) * pi * 4^2)/10000),
+            sumLiveTrees = sum(totalLiveTrees),
+            numberOfPlots = first(numberOfPlots))
+
+
+coniferParkDensityGraph <- coniferParkDensity %>%
+  ggplot(aes(x= Park, y = density)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Conifer Density",
+       x = "Park", y = "Density (#/ha)") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(coniferParkDensityGraph)
+```
+
+
+#### Density of Aspen vs Conifers
+
+```{r}
+compareDensity <- aspen$data$Observations %>%
+    # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Unique_ID)) %>%
+  ungroup() %>%
+  # filter out one species that is not aspen or conifer
+  filter(SpeciesCode != "BEUC2") %>%
+  mutate(isAspen = case_when(
+    SpeciesCode == "POPTRE" ~ 'Aspen',
+    .default = 'Conifer'
+  )) %>%
+  # Add up live trees for each row
+  mutate(liveTrees = Class1+Class2+Class3+Class4+Class5) %>%
+  # summarize total POTR5 divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park, isAspen) %>%
+  summarise(density = sum(liveTrees)/((first(numberOfPlots) * pi * 4^2)/10000),
+            totalLiveTrees = sum(liveTrees),
+            numberOfPlots = first(numberOfPlots))
+
+
+compareDensityGraph <- compareDensity %>%
+  ggplot(aes(x= Park, y = density, fill = as.factor(isAspen))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Aspen and Conifer Density",
+       x = "Park", y = "Density (#/ha)",
+       fill = "Species") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()
+
+plotly::ggplotly(compareDensityGraph)
+```
+
+#### Density of Aspen Age Groups
+
+```{r}
+ageGroupPark <- aspen$data$Observations %>%
+  # Find number of plot in each park
+  group_by(Park) %>%
+  mutate(numberOfPlots = n_distinct(Unique_ID)) %>%
+  ungroup() %>%
+  # filter for POTR5
+  filter(SpeciesCode == "POPTRE") %>%
+  # Pivot longer to make calculations easier
+  pivot_longer(cols = Class1:Class6, names_to = "class", values_to = "treeCount") %>%
+  # summarize POTR5 for each age group divided by area converted to ha (area = (number of plots * pi * r^2)/10000 )
+  group_by(Park, class) %>%
+  summarise(density = sum(treeCount)/((first(numberOfPlots) * pi * 4^2)/10000),
+            sumLiveTrees = sum(treeCount),
+            numberOfPlots = first(numberOfPlots)) 
+
+
+ageGroupParkGraph <- ageGroupPark %>%
+  ggplot(aes(x= Park, y = density, fill = factor(class))) +
+  geom_col(position = position_dodge(preserve = "single"), stat = "identity") +
+  theme_minimal() +
+  labs(title = "Age Class Density",
+       x = "Park", y = "Density (#/ha)",
+       fill = "Age Class") + 
+  theme(plot.title = element_text(hjust = 0.5)) +
+  scale_fill_light()
+
+plotly::ggplotly(ageGroupParkGraph)
+```
+
+
+#### Frequency of Disturbances
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each park)
+
+disturbancesFrequency <- aspen$data$Disturbances %>%
+  group_by(Park, Unique_ID) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Disturbance = first(Disturbance)) %>%
+  # # Join disturbance and all site data
+  # full_join(select(aspen$data$AllSites, any_of(c("Site", "Park"))), by = join_by(Site)) %>%
+  group_by(Park) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithDisturbances = sum(!is.na(Disturbance)), 
+            numberOfPlotsInPark = n(),
+            frequency = (first(plotsWithDisturbances) * pi * 4^2)/(first(numberOfPlotsInPark) * pi * 4^2)*100)
+
+disturbancesGraph <- disturbancesFrequency %>%
+  ggplot(aes(x = Park, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Disturbance Frequency",
+       x = "Park", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(disturbancesGraph)
+```
+
+#### Frequency of Pests
+
+```{r}
+# Area of plots in which a disturbance occurs / total area of plots (for each park)
+
+pestFrequency <- aspen$data$Pests %>%
+  group_by(Park, Unique_ID) %>%
+  # Group data so every site with a disturbance has only one entry
+  summarise(Pest = first(Pest)) %>%
+  # Join disturbance and all site data
+  # full_join(select(aspen$data$AllSites, any_of(c("Site", "Park"))), by = join_by(Site)) %>%
+  group_by(Park) %>%
+  # Find out how many plots in a watershed were disturbed
+  summarise(plotsWithPests = sum(!is.na(Pest)), 
+            numberOfPlotsInPark = n(),
+            frequency = (first(plotsWithPests) * pi * 4^2)/(first(numberOfPlotsInPark) * pi * 4^2)*100)
+
+pestGraph <- pestFrequency %>%
+  ggplot(aes(x = Park, y = frequency)) +
+  geom_col() +
+  theme_minimal() +
+  labs(title = "Pest Frequency",
+       x = "Park", y = "Frequency") + 
+  theme(plot.title = element_text(hjust = 0.5))
+
+plotly::ggplotly(pestGraph)
+```
+
diff --git a/scripts/visualizations/qcResults.qmd b/scripts/visualizations/qcResults.qmd
new file mode 100644
index 0000000..5932777
--- /dev/null
+++ b/scripts/visualizations/qcResults.qmd
@@ -0,0 +1,78 @@
+---
+title: "QC Results"
+format: 
+  html:
+    embed-resources: true
+execute: 
+  echo: false
+---
+
+
+```{r, message=FALSE, echo=FALSE, include=FALSE}
+library(tidyverse)
+library(aspen)
+
+# Import Data
+# 
+# data_dir = "data"
+# 
+# # Extract the path of the first Access database in the data folder folder of the project directory
+# # Or the database path can be input manually
+# #database_dir <- common::file.find(here::here("data"), "*.accdb", up = 0)[1]
+# 
+# fiveneedlepine::loadPine(database_dir)
+aspen <- fetchAndWrangleUCBNAspen()
+```
+
+# Aspen QC
+
+##### Check that there is at least one tree for every observation entry and flag entries with more than 250 trees
+```{r}
+if(nrow(aspen:::missingTreeUCBNQC(aspen)) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::missingTreeUCBNQC(aspen))
+}
+```
+
+
+
+##### Check that no entries in the observation table have a SpeciesCode that is NA or missing
+```{r}
+if(nrow(aspen:::treeSpeciesUCBNQC(aspen)) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::treeSpeciesUCBNQC(aspen))
+}
+```
+
+
+##### Check that unknown species entries do not have any live trees
+```{r}
+if(nrow(aspen:::unknownSpeciesUCBNQC(aspen)) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::unknownSpeciesUCBNQC(aspen))
+}
+```
+
+
+##### Check that all plots have at least one live aspen for each visit
+```{r}
+if(nrow(aspen:::checkAspenUCBNQC(aspen)) == 0){
+  cat('No errors')
+} else{
+  rmarkdown::paged_table(aspen:::checkAspenUCBNQC(aspen))
+}
+```
+
+
+##### Flag large changes in number of tree between years
+```{r}
+# if(nrow(fiveneedlepine:::treeChangeUCBNQC(aspen)) == 0){
+#   cat('No errors')
+# } else{
+#   rmarkdown::paged_table(fiveneedlepine:::treeChangeUCBNQC(aspen))
+# }
+```
+
diff --git a/tests/testthat/test-aspen_qc.R b/tests/testthat/test-aspen_qc.R
index 56246d8..f3b2e66 100644
--- a/tests/testthat/test-aspen_qc.R
+++ b/tests/testthat/test-aspen_qc.R
@@ -5,9 +5,12 @@
   # check that the return type from function is correct
   # make fake data and then test number of rows?? - nah probs not
 
+
+data <- fetchAndWrangleMOJNAspen()
+
 test_that("Test that missingTreeQC() works", {
   # Run function
-  returnData <- missingTreeQC()
+  returnData <- missingTreeQC(data)
 
   # Compare expected and actual column names
   actual_cols <- names(returnData)
@@ -20,7 +23,7 @@ test_that("Test that missingTreeQC() works", {
 
 test_that("Test that treeSpeciesQC() works", {
   # Run function
-  returnData <- treeSpeciesQC()
+  returnData <- treeSpeciesQC(data)
 
   # Compare expected and actual column names
   actual_cols <- names(returnData)
@@ -33,7 +36,7 @@ test_that("Test that treeSpeciesQC() works", {
 
 test_that("Test that unknownSpeciesQC() works", {
   # Run function
-  returnData <- unknownSpeciesQC()
+  returnData <- unknownSpeciesQC(data)
 
   # Compare expected and actual column names
   actual_cols <- names(returnData)
@@ -46,11 +49,11 @@ test_that("Test that unknownSpeciesQC() works", {
 
 test_that("Test that checkAspenQC() works", {
   # Run function
-  returnData <- checkAspenQC()
+  returnData <- checkAspenQC(data)
 
   # Compare expected and actual column names
   actual_cols <- names(returnData)
-  expected_cols <- c("Park", "Site","VisitType", "VisitDate", "SpeciesCode", "Class1", "Class2", "Class3", "Class4", "Class5", "Class6")
+  expected_cols <- c("Park", "Site","VisitType", "VisitDate", "SpeciesCode")
   expect_equal(actual_cols, expected_cols)
 
   # Check that function returns a database
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 71b25fb..310a53a 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -6,18 +6,18 @@
   # check that each data table has the correct number of columns and that the columns have the right names
   # check that the return type from function is correct
 
-test_that("Test that fetchAndWrangleAspen() works", {
+test_that("Test that fetchAndWrangleMOJNAspen() works", {
   # Compare expected and actual names in return object
-  actual_names <- names(fetchAndWrangleAspen())
+  actual_names <- names(fetchAndWrangleMOJNAspen())
   expected_names <- c("data", "metadata")
   expect_equal(actual_names, expected_names)
 
   # Compare expected and actual names of the aspen data frames
-  actual_cols <- names(fetchAndWrangleAspen()$data)
+  actual_cols <- names(fetchAndWrangleMOJNAspen()$data)
   expected_cols <- c("AllSites", "SiteVisit", "Disturbances", "Observations", "Pests")
   expect_equal(actual_cols, expected_cols)
 
   # Check that the first object in the data list is a database
-  returnType <- fetchAndWrangleAspen()$data[[1]]
+  returnType <- fetchAndWrangleMOJNAspen()$data[[1]]
   expect_s3_class(returnType, "data.frame")
 })

From 136b84b9df9e3594bc57999307be4a57d586967d Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Mon, 24 Jun 2024 13:35:46 -0700
Subject: [PATCH 10/11] adding README

---
 R/utils.R  |  3 ---
 README.Rmd | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)
 create mode 100644 README.Rmd

diff --git a/R/utils.R b/R/utils.R
index 1766684..8b6cf67 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -15,7 +15,6 @@ pkg_globals <- new.env(parent = emptyenv())
 #' @export
 
 # TODO: add variables that were joined to data tables to each metadata table
-# TODO should data be loaded from inside the function like now or input as a variable
 fetchAndWrangleMOJNAspen <- function(
     aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
                                  site_url =  "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/AspenSites2/FeatureServer",
@@ -96,8 +95,6 @@ fetchAndWrangleMOJNAspen <- function(
 #' @return A list of data frames and metadata
 #' @export
 
-
-# TODO fix this
 fetchAndWrangleUCBNAspen <- function(
     aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/service_4d6343e9204142928351c52c6f1362c5/FeatureServer",
                                  site_url =  NULL,
diff --git a/README.Rmd b/README.Rmd
new file mode 100644
index 0000000..9195463
--- /dev/null
+++ b/README.Rmd
@@ -0,0 +1,42 @@
+---
+title: "Aspen Guide"
+output: github_document
+date: "2024-06-20"
+---
+
+The goal of the aspen package is to provide data QC, visualization, and publication assistance for MOJN aspen data.
+
+### Installation
+
+If you already have a GitHub token for RStudio run the code below to download the aspen package. If you haven’t generated a GitHub token for RStudio, you can follow the guide [here](https://frbcesab.github.io/rsetup/chapters/github-token.html) to set it up and then run the following code to download the aspen package.
+
+```{r, eval=FALSE}
+# install.packages("devtools")
+devtools::install_github(repo = "nationalparkservice/mojn-aspen-rpackage")
+```
+
+
+### Metadata and AGOL Password
+
+Before running any of the functions in the package you should check that the data in you aspen AGOL database is filled out. Instructions for filling out AGOL metadata can be found [here](https://github.com/nationalparkservice/imd-fetchagol-package. 
+
+Before running the functions, it is also recommended that you save the password to your AGOL headless account using the keyring package in R. To add headless account information to the default keyring follow the steps below. The code should only have to be run once.
+
+
+```{r, eval = FALSE}
+# Run the function below. Change the username field to the username of your headless account and input the password when prompted
+keyring::key_set(service = "AGOL", username = "USERNAME", prompt = "Password for headless account: ")
+
+# To check the key was saved run code below, filling in the username to your headless account
+keyring::key_get(service = "AGOL", username = "USERNAME")
+```
+
+### Functions
+
+A short summary of what this package contains. For more information see function documentation.
+
+- fetchAndWrangleMOJNAspen(): load MOJN aspen data from AGOL into R and perform some basic data wrangling
+- fetchAndWrangleUCBNAspen(): load UCBN aspen data from AGOL into R and perform some basic data wrangling
+- writeAspen(): write aspen data and metadata to CSVs
+
+In addition to these functions the aspen package has some quality control functions, a quality control script, and a visualization script. To learn more about the functions see package documentation. You can find the scripts in the script folder of the [GitHub repo](https://github.com/nationalparkservice/mojn-aspen-rpackage) or if you cloned the GitHub repo to your desktop you can open the files from there. Download, open, and then run the .qmd file to see the results. 

From 80dda46ced6a316fb1cd0aa978b9878bc496b697 Mon Sep 17 00:00:00 2001
From: Irene Foster <ifoster@nps.gov>
Date: Mon, 24 Jun 2024 13:56:58 -0700
Subject: [PATCH 11/11] rename function from fetch to load

---
 NAMESPACE                                     |  4 +-
 R/utils.R                                     |  6 +-
 README.Rmd                                    |  4 +-
 README.md                                     | 58 +++++++++++++++++++
 ...OJNAspen.Rd => loadAndWrangleMOJNAspen.Rd} |  6 +-
 ...CBNAspen.Rd => loadAndWrangleUCBNAspen.Rd} |  6 +-
 .../visualizations/aspen_visualizations.qmd   | 37 ------------
 .../aspen_visualizations_ucbn.qmd             | 22 -------
 tests/testthat/test-aspen_qc.R                |  2 +-
 tests/testthat/test-utils.R                   | 12 ++--
 10 files changed, 78 insertions(+), 79 deletions(-)
 create mode 100644 README.md
 rename man/{fetchAndWrangleMOJNAspen.Rd => loadAndWrangleMOJNAspen.Rd} (89%)
 rename man/{fetchAndWrangleUCBNAspen.Rd => loadAndWrangleUCBNAspen.Rd} (88%)

diff --git a/NAMESPACE b/NAMESPACE
index 7c1f1ae..8bfd85f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
-export(fetchAndWrangleMOJNAspen)
-export(fetchAndWrangleUCBNAspen)
+export(loadAndWrangleMOJNAspen)
+export(loadAndWrangleUCBNAspen)
 export(writeAspen)
 import(dplyr)
 import(fetchagol)
diff --git a/R/utils.R b/R/utils.R
index 8b6cf67..cbbbee2 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -15,7 +15,7 @@ pkg_globals <- new.env(parent = emptyenv())
 #' @export
 
 # TODO: add variables that were joined to data tables to each metadata table
-fetchAndWrangleMOJNAspen <- function(
+loadAndWrangleMOJNAspen <- function(
     aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
                                  site_url =  "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/AspenSites2/FeatureServer",
                                  agol_username = "mojn_data") {
@@ -41,7 +41,6 @@ fetchAndWrangleMOJNAspen <- function(
   raw_data <- fetchagol::cleanData(raw_data)
 
 
-  # TODO Figure out what to do with metadata, in iu its left empty/fix metadata
   flattened_data$metadata <- raw_data$metadata
 
   # Add optional second database to flattened data
@@ -95,7 +94,7 @@ fetchAndWrangleMOJNAspen <- function(
 #' @return A list of data frames and metadata
 #' @export
 
-fetchAndWrangleUCBNAspen <- function(
+loadAndWrangleUCBNAspen <- function(
     aspen_url = "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/service_4d6343e9204142928351c52c6f1362c5/FeatureServer",
                                  site_url =  NULL,
                                  agol_username = "mojn_data") {
@@ -124,7 +123,6 @@ fetchAndWrangleUCBNAspen <- function(
   if(!is.null(site_url)) {
     flattened_data$data$AllSites <- raw_data$data$AllSites
 
-    # TODO Figure out what to do with metadata, in iu its left empty/fix metadata
     flattened_data$metadata <- raw_data$metadata
 
     # Join background site information with other tables and add background information metadata to tables it was added to
diff --git a/README.Rmd b/README.Rmd
index 9195463..c885ad2 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -35,8 +35,8 @@ keyring::key_get(service = "AGOL", username = "USERNAME")
 
 A short summary of what this package contains. For more information see function documentation.
 
-- fetchAndWrangleMOJNAspen(): load MOJN aspen data from AGOL into R and perform some basic data wrangling
-- fetchAndWrangleUCBNAspen(): load UCBN aspen data from AGOL into R and perform some basic data wrangling
+- loadAndWrangleMOJNAspen(): load MOJN aspen data from AGOL into R and perform some basic data wrangling
+- loadAndWrangleUCBNAspen(): load UCBN aspen data from AGOL into R and perform some basic data wrangling
 - writeAspen(): write aspen data and metadata to CSVs
 
 In addition to these functions the aspen package has some quality control functions, a quality control script, and a visualization script. To learn more about the functions see package documentation. You can find the scripts in the script folder of the [GitHub repo](https://github.com/nationalparkservice/mojn-aspen-rpackage) or if you cloned the GitHub repo to your desktop you can open the files from there. Download, open, and then run the .qmd file to see the results. 
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9b385b2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,58 @@
+Aspen Guide
+================
+2024-06-20
+
+The goal of the aspen package is to provide data QC, visualization, and
+publication assistance for MOJN aspen data.
+
+### Installation
+
+If you already have a GitHub token for RStudio run the code below to
+download the aspen package. If you haven’t generated a GitHub token for
+RStudio, you can follow the guide
+[here](https://frbcesab.github.io/rsetup/chapters/github-token.html) to
+set it up and then run the following code to download the aspen package.
+
+``` r
+# install.packages("devtools")
+devtools::install_github(repo = "nationalparkservice/mojn-aspen-rpackage")
+```
+
+### Metadata and AGOL Password
+
+Before running any of the functions in the package you should check that
+the data in you aspen AGOL database is filled out. Instructions for
+filling out AGOL metadata can be found
+\[here\](<https://github.com/nationalparkservice/imd-fetchagol-package>.
+
+Before running the functions, it is also recommended that you save the
+password to your AGOL headless account using the keyring package in R.
+To add headless account information to the default keyring follow the
+steps below. The code should only have to be run once.
+
+``` r
+# Run the function below. Change the username field to the username of your headless account and input the password when prompted
+keyring::key_set(service = "AGOL", username = "USERNAME", prompt = "Password for headless account: ")
+
+# To check the key was saved run code below, filling in the username to your headless account
+keyring::key_get(service = "AGOL", username = "USERNAME")
+```
+
+### Functions
+
+A short summary of what this package contains. For more information see
+function documentation.
+
+- loadAndWrangleMOJNAspen(): load MOJN aspen data from AGOL into R and
+  perform some basic data wrangling
+- loadAndWrangleUCBNAspen(): load UCBN aspen data from AGOL into R and
+  perform some basic data wrangling
+- writeAspen(): write aspen data and metadata to CSVs
+
+In addition to these functions the aspen package has some quality
+control functions, a quality control script, and a visualization script.
+To learn more about the functions see package documentation. You can
+find the scripts in the script folder of the [GitHub
+repo](https://github.com/nationalparkservice/mojn-aspen-rpackage) or if
+you cloned the GitHub repo to your desktop you can open the files from
+there. Download, open, and then run the .qmd file to see the results.
diff --git a/man/fetchAndWrangleMOJNAspen.Rd b/man/loadAndWrangleMOJNAspen.Rd
similarity index 89%
rename from man/fetchAndWrangleMOJNAspen.Rd
rename to man/loadAndWrangleMOJNAspen.Rd
index ffaa9b1..04239d1 100644
--- a/man/fetchAndWrangleMOJNAspen.Rd
+++ b/man/loadAndWrangleMOJNAspen.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/utils.R
-\name{fetchAndWrangleMOJNAspen}
-\alias{fetchAndWrangleMOJNAspen}
+\name{loadAndWrangleMOJNAspen}
+\alias{loadAndWrangleMOJNAspen}
 \title{Fetch aspen data from AGOL and do preliminary data wrangling}
 \usage{
-fetchAndWrangleMOJNAspen(
+loadAndWrangleMOJNAspen(
   aspen_url =
     "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/MOJN_Aspen_Test_Visit_NonSpatial_gdb/FeatureServer",
   site_url =
diff --git a/man/fetchAndWrangleUCBNAspen.Rd b/man/loadAndWrangleUCBNAspen.Rd
similarity index 88%
rename from man/fetchAndWrangleUCBNAspen.Rd
rename to man/loadAndWrangleUCBNAspen.Rd
index 5fdc77a..f9a494c 100644
--- a/man/fetchAndWrangleUCBNAspen.Rd
+++ b/man/loadAndWrangleUCBNAspen.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/utils.R
-\name{fetchAndWrangleUCBNAspen}
-\alias{fetchAndWrangleUCBNAspen}
+\name{loadAndWrangleUCBNAspen}
+\alias{loadAndWrangleUCBNAspen}
 \title{Fetch aspen data from AGOL and do preliminary data wrangling}
 \usage{
-fetchAndWrangleUCBNAspen(
+loadAndWrangleUCBNAspen(
   aspen_url =
     "https://services1.arcgis.com/fBc8EJBxQRMcHlei/arcgis/rest/services/service_4d6343e9204142928351c52c6f1362c5/FeatureServer",
   site_url = NULL,
diff --git a/scripts/visualizations/aspen_visualizations.qmd b/scripts/visualizations/aspen_visualizations.qmd
index f1670d9..59f72c9 100644
--- a/scripts/visualizations/aspen_visualizations.qmd
+++ b/scripts/visualizations/aspen_visualizations.qmd
@@ -195,8 +195,6 @@ fig
 
 # this is finding the density for each plot and then averaging it for the communities
 
-
-# TODO: make it so the different plots in the community are aggregated before density is calculated
 aspenDensity2 <- aspen$data$Observations %>%
   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
   # filter for POTR5
@@ -302,41 +300,6 @@ coniferDensityGraph <- coniferDensity %>%
 coniferDensityGraph
 ```
 
-
-```{r}
-# # 4 meter radius of plots
-# # area = pi*r^2
-# # density = #/ha
-# 
-# # TODO: should i aggregate all the different types of conifers per plot before graphing?? - probably yea i think so
-# 
-# coniferDensity <- aspen$data$Observations %>%
-#   left_join(select(aspen$data$AllSites, any_of(c("Site", "Community", "numberOfPlotsInCommunity"))), by = join_by(Site)) %>%
-#   # filter for POTR5
-#   filter(SpeciesCode != "POTR5" & SpeciesCode != "BEUC2") %>%
-#   # Find total live trees in each row
-#   mutate(treeSum = Class1+Class2+Class3+Class4+Class5) %>%
-#   group_by(Site, Community) %>%
-#   summarise(totalTrees = sum(treeSum)) %>%
-#   # Find density for each plot
-#   mutate(density = (totalTrees)/((pi * 4^2)/10000))
-# 
-# 
-# coniferDensityGraph <- coniferDensity %>%
-#   ggplot(aes(x= Community, y = density, fill = Community)) +
-#   geom_violin() +
-#   #geom_col() +
-#   theme_minimal() +
-#   labs(title = "Conifer Density",
-#        x = "Community", y = "Density (#/ha)") + 
-#   theme(plot.title = element_text(hjust = 0.5)) +
-#   theme(axis.text.x = element_text(size = 6, angle = 315, hjust = 0)) +
-#   scale_fill_light() + 
-#   guides(fill="none")
-# 
-# coniferDensityGraph
-```
-
 #### Density of Aspen vs Conifers
 
 ```{r}
diff --git a/scripts/visualizations/aspen_visualizations_ucbn.qmd b/scripts/visualizations/aspen_visualizations_ucbn.qmd
index 712a913..19a8ada 100644
--- a/scripts/visualizations/aspen_visualizations_ucbn.qmd
+++ b/scripts/visualizations/aspen_visualizations_ucbn.qmd
@@ -20,26 +20,6 @@ aspen <- aspen::fetchAndWrangleUCBNAspen()
 ```{r}
 # find number of plots in each watershed
 
-# TODO: update this to Allsites tables once not used sites are removed
-
-# aspen$data$AllSites <- aspen$data$AllSites %>%
-#   filter(Site %in% aspen$data$SiteVisit$Site) %>%
-#       # Find Number of plot in each watershed
-#   group_by(Stratum) %>%
-#   mutate(numberOfPlotsInWatershed = n()) %>%
-#   ungroup() %>%
-#   group_by(Community) %>%
-#   # Find number of plots in each community
-#   mutate(numberOfPlotsInCommunity = n()) %>%
-#   ungroup()
-
-# aspen$data$SiteVisit <- aspen$data$SiteVisit %>%
-#   #filter(Site %in% aspen$data$SiteVisit$Site) %>%
-#       # Find Number of plot in each watershed
-#   group_by(Stand) %>%
-#   mutate(numberOfPlotsInStand = n_distinct(Unique_ID)) %>%
-#   ungroup() 
-
 numberOfPlots <- aspen$data$SiteVisit %>%
   group_by(Stand) %>%
   summarise(numberOfPlotsInStand = n_distinct(Unique_ID))
@@ -174,8 +154,6 @@ fig
 
 # this is finding the density for each plot and then averaging it for the communities
 
-
-# TODO: make it so the different plots in the community are aggregated before density is calculated
 aspenDensity2 <- aspen$data$Observations %>%
   # left_join(select(aspen$data$SiteVisit, any_of(c("numberOfPlotsInStand", "globalid"))), by = c("parentglobalid" = "globalid")) %>%
   left_join(select(numberOfPlots, any_of(c("Stand", "numberOfPlotsInStand"))), by = join_by(Stand)) %>%
diff --git a/tests/testthat/test-aspen_qc.R b/tests/testthat/test-aspen_qc.R
index f3b2e66..5cac498 100644
--- a/tests/testthat/test-aspen_qc.R
+++ b/tests/testthat/test-aspen_qc.R
@@ -6,7 +6,7 @@
   # make fake data and then test number of rows?? - nah probs not
 
 
-data <- fetchAndWrangleMOJNAspen()
+data <- loadAndWrangleMOJNAspen()
 
 test_that("Test that missingTreeQC() works", {
   # Run function
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 310a53a..fe8cbc2 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -1,23 +1,25 @@
 
 
-# TODO things to test in fetchAndWrangleAspen - only function in utils rn
+# Things to test in fetchAndWrangleAspen - only function in utils rn
   # check that $data and $meta data exists
   # check that there are the correct number of tables in data and metadata
   # check that each data table has the correct number of columns and that the columns have the right names
   # check that the return type from function is correct
 
-test_that("Test that fetchAndWrangleMOJNAspen() works", {
+data <- loadAndWrangleMOJNAspen()
+
+test_that("Test that loadAndWrangleMOJNAspen() works", {
   # Compare expected and actual names in return object
-  actual_names <- names(fetchAndWrangleMOJNAspen())
+  actual_names <- names(data)
   expected_names <- c("data", "metadata")
   expect_equal(actual_names, expected_names)
 
   # Compare expected and actual names of the aspen data frames
-  actual_cols <- names(fetchAndWrangleMOJNAspen()$data)
+  actual_cols <- names(data$data)
   expected_cols <- c("AllSites", "SiteVisit", "Disturbances", "Observations", "Pests")
   expect_equal(actual_cols, expected_cols)
 
   # Check that the first object in the data list is a database
-  returnType <- fetchAndWrangleMOJNAspen()$data[[1]]
+  returnType <- data$data[[1]]
   expect_s3_class(returnType, "data.frame")
 })