From 3d8c879f5767899986ff2856517aed3f33d11939 Mon Sep 17 00:00:00 2001 From: nickharing Date: Fri, 7 Jun 2024 14:15:43 -0700 Subject: [PATCH 01/10] Update BRI.R This is an update to the master version of BRI.R. I made edits to conform better with tidyverse syntax conventions and packages. --- R/BRI.R | 146 +++++++++++++++----------------------------------------- 1 file changed, 38 insertions(+), 108 deletions(-) diff --git a/R/BRI.R b/R/BRI.R index 9e2a21e..a592c3d 100644 --- a/R/BRI.R +++ b/R/BRI.R @@ -1,20 +1,16 @@ #' Compute the benthic response index (BRI) score and BRI condition category. #' #' @description -#' The BRI is the abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher -#' the BRI score, the more degraded the benthic community represented by the sample. +#' The BRI is the abundance-weighted pollution tolerance score of the organisms present in a benthic sample. +#' The higher the BRI score, the more degraded the benthic community represented by the sample. #' #' @details -#' The BRI is the abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher -#' the BRI score, the more degraded the benthic community represented by the sample. -#' #' Two types of data are needed to calculate the BRI: -#' #' (1) the abundance of each species and #' (2) its pollution tolerance score, P. #' -#' The P values are available for most species present in the assemblage. Only species for which P values are avialable -#' are used in the BRI calculations. P values showld be obtained for the appropriate habitat and from the most +#' The P values are available for most species present in the assemblage. Only species for which P values are available +#' are used in the BRI calculations. P values should be obtained for the appropriate habitat and from the most #' up-to-date list available. #' #' The first step in the BRI calculation is to compute the 4th root of the abundance of each taxon in the sample for @@ -27,28 +23,18 @@ #' #' \deqn{ \frac{\sum \left(\sqrt[p]{\textrm{Abundance}} \right) \times P}{\sum \sqrt[p]{\textrm{Abundance}}} } #' -#' The last step is to compare the BRI score to the BRI threshold values in Table 5 to determine the BRI category and +#' The last step is to compare the BRI score to the BRI threshold values to determine the BRI category and #' category score. #' -#' -#' -#' -#' -#' @param BenthicData a data frame with the following headings -#' -#' \strong{\code{StationID}} - an alpha-numeric identifier of the location; -#' -#' \strong{\code{Replicate}} - a numeric identifying the replicate number of samples taken at the location; -#' -#' \strong{\code{SampleDate}} - the date of sample collection; +#' @param benthic_data a data frame with the following headings #' -#' \strong{\code{Latitude}} - latitude in decimal degrees; -#' -#' \strong{\code{Longitude}} - longitude in decimal degrees. +#' \strong{\code{station_id}} - an alpha-numeric identifier of the location; +#' \strong{\code{replicate}} - a numeric identifying the replicate number of samples taken at the location; +#' \strong{\code{sample_date}} - the date of sample collection; +#' \strong{\code{latitude}} - latitude in decimal degrees; +#' \strong{\code{longitude}} - longitude in decimal degrees. #' Make sure there is a negative sign for the Western coordinates; -#' -#' \strong{\code{Species}} - name of the fauna, ideally in SCAMIT ed12 format, do not use sp. or spp., -#' use sp only or just the Genus. If no animals were present in the sample use +#' \strong{\code{species}} - name of the fauna, ideally in SCAMIT ed12 format. If no animals were present in the sample use #' NoOrganismsPresent with 0 abundance; #' #' @usage @@ -58,97 +44,41 @@ #' data(benthic_sampledata) # load sample data #' BRI(benthic_sampledata) # see the output #' -#' @import vegan -#' @import reshape2 -#' @importFrom dplyr left_join filter rename select mutate group_by summarize summarise case_when +#' @importFrom dplyr left_join filter select mutate group_by summarize case_when +#' @importFrom tidyr pivot_longer #' #' @export +BRI <- function(benthic_data) { -BRI <- function(BenthicData, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'log.txt' ), verbose = T) -{ - - # Initialize Logging - init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) - hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) - - writelog('\nBEGIN: BRI function.\n', logfile = logfile, verbose = verbose) - - writelog('*** DATA *** Input to BRI function - BRI-step0.csv', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(BenthicData, logfile = file.path(dirname(logfile), 'BRI-step0.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) - - writelog('*** DATA *** sqo.list.new - which gets joined with BenthicData', logfile = logfile, verbose = verbose) - writelog(sqo.list.new, logfile = file.path(dirname(logfile), 'BRI-sqo.list.new.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) - - out <- BenthicData %>% - left_join(sqo.list.new, by = c('Taxon' = 'TaxonName')) - - writelog('*** DATA *** Benthic data joined with sqo.list.new', logfile = logfile, verbose = verbose) - writelog(out, logfile = file.path(dirname(logfile), 'BRI-step1.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) - - out <- out %>% - # I assume that the next line is something they had in there as a method of removing duplicates - # for this reason, this next line will likely be eliminated. - # They grouped by all the columns that were selected (In query BRI - 1) - # Instead, if need be we can use something from dplyr that deals with duplicates - # I actually found that it didn't appear to make a difference - filter(!is.na(ToleranceScore)) %>% - #rename(Stratum) %>% - select(Stratum, StationID, SampleDate, Replicate, Taxon, Abundance, ToleranceScore) - - writelog('*** DATA *** Remove NA tolerance scores and select only the columns: Stratum, StationID, SampleDate, Replicate, Taxon, Abundance, ToleranceScore', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(out, logfile = file.path(dirname(logfile), 'BRI-step2.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) - - out <- out %>% - # End of BRI - 2 query. Begin BRI - 3 query + out <- benthic_data %>% + left_join(sqo.list.new, by = c("taxon" = "taxon_name")) %>% + filter(!is.na(tolerance_score)) %>% + select(stratum, station_id, sample_date, replicate, taxon, abundance, tolerance_score) %>% mutate( - fourthroot_abun = Abundance ** 0.25, - tolerance_score = fourthroot_abun * ToleranceScore - ) - - writelog('*** DATA *** Calculate fourthroot of abundance - also multiply by tolerancs score - BRI-step3.csv', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(out, logfile = file.path(dirname(logfile), 'BRI-step3.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) - - writelog('Next get the Score - group by Stratum, StationID, SampleDate, Replicate and do: (sum of the tolerance scores)/(sum of fourthroot abundances)', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog('Then Get Categories (CASQO Technical Manual 3rd Edition Page 72 - Table 4.24)', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog('---- < 39.96 is Reference', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog('---- >=39.96 and <49.15 is Low', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog('---- >=49.15 and <73.27 is Reference', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog('---- >=73.27 is High', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - - out <- out %>% - # End of BRI - 3. Begin BRI - 4 - group_by( - Stratum, StationID, SampleDate, Replicate + fourthroot_abun = abundance ^ 0.25, + weighted_score = fourthroot_abun * tolerance_score ) %>% + group_by(stratum, station_id, sample_date, replicate) %>% summarize( - Score = sum(tolerance_score, na.rm = T) / sum(fourthroot_abun, na.rm = T) + score = sum(weighted_score, na.rm = TRUE) / sum(fourthroot_abun, na.rm = TRUE), + .groups = 'drop' ) %>% - # Output the BRI category given the BRI score and the thresholds for Southern California Marine Bays - # CASQO Technical Manual 3rd Edition Page 72 - Table 4.24 - mutate( - Category = case_when( (Score < 39.96) ~ "Reference", - (Score >= 39.96 & Score < 49.15) ~ "Low Disturbance", - (Score >= 49.15 & Score < 73.27) ~ "Moderate Disturbance", - (Score >= 73.27) ~ "High Disturbance" - )) %>% - # Output the BRI category score given the category for thresholds for Southern CA Marine Bays mutate( - `Category Score` = case_when( (Category == "Reference") ~ 1, - (Category == "Low Disturbance") ~ 2, - (Category == "Moderate Disturbance") ~ 3, - (Category == "High Disturbance") ~ 4 ) - ) %>% - dplyr::mutate(Index = "BRI") - - - writelog('*** DATA *** Final BRI dataframe: BRI-final.csv', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(out, logfile = file.path(dirname(logfile), 'BRI-final.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) - - - writelog('\nEND: BRI function.\n', logfile = logfile, verbose = verbose) + category = case_when( + score < 39.96 ~ "Reference", + score >= 39.96 & score < 49.15 ~ "Low Disturbance", + score >= 49.15 & score < 73.27 ~ "Moderate Disturbance", + score >= 73.27 ~ "High Disturbance" + ), + category_score = case_when( + category == "Reference" ~ 1, + category == "Low Disturbance" ~ 2, + category == "Moderate Disturbance" ~ 3, + category == "High Disturbance" ~ 4 + ), + index = "BRI" + ) return(out) } - - From d3534b46c83445adf568a0496dc14fdacdf0da8e Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Wed, 5 Jun 2024 00:42:15 -0700 Subject: [PATCH 02/10] overhauling logging mechanism for chemistry --- R/SQOUnified.R | 8 +- R/chemistry.R | 1527 ++++++++++++++++++++++++++++++++++++++++-------- R/logutils.R | 46 +- 3 files changed, 1308 insertions(+), 273 deletions(-) diff --git a/R/SQOUnified.R b/R/SQOUnified.R index 60e07fc..ad84d56 100644 --- a/R/SQOUnified.R +++ b/R/SQOUnified.R @@ -72,7 +72,13 @@ SQOUnified <- function(benthic = NULL, chem = NULL, tox = NULL, logfile = file.p # ---- Chemistry ---- if (!is.null(chem)) { - chem <- chem.sqo(chem, logfile = file.path( dirname(logfile), 'Chemistry', 'log.txt' ), verbose = verbose) %>% + + chemlogfile <- file.path( dirname(logfile), 'Chemistry', 'chemlog.Rmd' ) + chemlibs <- c('tidyverse', 'DT', 'knitr', 'rmarkdown', 'SQOUnified') + + init.log(chemlogfile, base.func.name = sys.call(), type = 'RMarkdown', current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose, libraries = chemlibs) + + chem <- chem.sqo(chem, logfile = chemlogfile, verbose = verbose) %>% mutate(LOE = 'Chemistry') %>% select(StationID, LOE, Index, Score, Category, `Category Score`) } else { diff --git a/R/chemistry.R b/R/chemistry.R index d02f487..72554c4 100644 --- a/R/chemistry.R +++ b/R/chemistry.R @@ -1,4 +1,4 @@ -# ------------ LRM -------------- +# ------------ LRM (Logistic Regression Model)-------------- #' Logistic Regression Model Score #' #' This function calculates the Logistic Regression Model score @@ -26,74 +26,225 @@ #' #' @import dplyr #' @export -LRM <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', paste0(format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), '-log.txt') ), verbose = T) { +LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd(), 'logs', paste0(format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), '-chemlog.Rmd') ), verbose = T) { "lrm_table" # Initialize Logging init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) - writelog("\nBEGIN Chem LRM Function\n", logfile = logfile, verbose = verbose) + writelog("\n\n## Chem CA LRM Function\n", logfile = logfile, verbose = verbose) + + # ---- Save the raw input to an RData file (for the sake of those who want the auditing logs) ---- + rawinput.filename <- 'lrm.input.RData' + if (verbose) { + save(chemdata.lrm.input, file = file.path( dirname(logfile), rawinput.filename )) + } + + writelog( + "\n### Initial input to LRM:", + logfile = logfile, + code = paste0("load('", rawinput.filename, "')"), + data = chemdata.lrm.input, + verbose = verbose + ) + create_download_link(data = chemdata.lrm.input, logfile = logfile, filename = 'LRM_InitialInputData.csv', linktext = 'Download Initial Input to Chem LRM Function', verbose = verbose) + - writelog("*** DATA *** Chem data (Initial input to LRM function) may be found in LRM_initial_input_data.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata, logfile = file.path(dirname(logfile), 'LRM_initial_input_data-step0.csv'), filetype = 'csv', verbose = verbose) - # get it in a usable format + + # ---- Make the call to the Preprocessing function (If not already preprocessed) ---- if (!preprocessed) { - writelog("Input to CSI function did not come preprocessed.", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("\nPrepping/Preprocessing chem data...", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - chemdata <- chemdata_prep(chemdata, logfile = logfile, verbose = verbose) - writelog("\nDone prepping chem data...", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # Write to the log file + writelog( + "\n#### Preprocessing chemistry data (Details within chemdata_prep to be shown later as well):\n", + logfile = logfile, + verbose = verbose + ) + + # Actually call the function + chemdata.lrm.input <- chemdata_prep(chemdata.lrm.input, logfile = logfile, verbose = verbose) + + # Create code block and download link to the preprocessed data + writelog( + "\n#### Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", + logfile = logfile, + code = 'chemdata.lrm.input <- chemdata_prep(chemdata.lrm.input, verbose = FALSE)', + verbose = verbose + ) + create_download_link(data = chemdata.lrm.input, logfile = logfile, filename = 'LRM_PreProcessedInput.csv', linktext = 'Download Preprocessed Input to LRM Function (after calling chemdata_prep)', verbose = verbose) - writelog("*** DATA *** Chem data (Input to LRM function after preprocessing) may be found in LRM_preprocessed_input_data-step0.1.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata, logfile = file.path(dirname(logfile), 'LRM_preprocessed_input_data-step0.1.csv'), filetype = 'csv', verbose = verbose) } - # Take the Log10 of the chemistry concentration. - writelog("Take the Log10 of the chemistry concentration.", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - chemdata <- chemdata %>% + # ---- Display the LRM Table ---- + writelog( + "### LRM table by AnalyteName (Table 3.5 on page 39 of Technical Manual)", + data = lrm_table, + logfile = logfile, + verbose = verbose, + pageLength = 12 + ) + + + # ---- CA LRM Step 0 - Take Log10 of the results ---- + + # Write to log file + writelog("\n### Step 0: Take the Log10 of the chemistry concentration.", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata.log10 <- chemdata.lrm.input %>% mutate( logResult = log10(Result) ) - writelog("*** DATA *** Chem Data for LRM function AFTER taking Log base 10 of Result Column found in LRM_chemdata_log10-step1.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata, logfile = file.path(dirname(logfile), 'LRM_chemdata_log10-step1.csv'), filetype = 'csv', verbose = verbose) - - # Combine LRM values with data based on the compound. Exclude compounds not in lrm. - - writelog("Manipulate the LRM chem data per Technial Manual page 37-40", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Join with the LRM table by AnalyteName (Table 3.5 on page 39 of Technical Manual)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - - writelog("*** DATA *** LRM table may be found in LRM_Table.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(lrm_table, logfile = file.path(dirname(logfile), 'LRM_Table.csv'), filetype = 'csv', verbose = verbose) - - writelog("-- p = (exp(B0 + B1 * logResult) / (1 + exp(B0 + B1 * logResult))) ---> Round to 2 decimal places", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Group by StationID (this function and R package overall assumes you are feeding it data for one sediment sample per stationid)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- take max value of the variable p within each grouping (grouped by StationID)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Sum the weighted scores and the weights - then take the sum of the weighted scores divided by the sum of the weights. - This is the CSI Score", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Assign LRM Categories based on thresholds defined in table 3.6 of Technical Manual page 40", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- Below 0.33 is 'Minimal'", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- From 0.33 to 0.49 is 'Low' (upper and lower bounds are inclusive)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- From 0.49 to 0.66 is 'Moderate' (lower bound is exclusive, upper bound is inclusive)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- Above 0.66 is 'High'", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - - # Page 37 of Technical Manual - chemdata_lrm <- lrm_table %>% - left_join(chemdata, by = 'AnalyteName') %>% + + # Write to code portion of the logs + writelog( + "", + code = ' + chemdata.log10 <- chemdata.lrm.input %>% + mutate( + logResult = log10(Result) + ) + ', + data = chemdata.log10, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata.log10, logfile = logfile, filename = 'LRM_Step0 (Log Transform).csv', linktext = 'Download Log Transformed Input to LRM Function (Step 0)', verbose = verbose) + + + + + + + writelog("\n### Manipulate the LRM chem data per Technical Manual page 37-40", logfile = logfile, verbose = verbose) + + + # -------- Step 1 - Join LRM Table with the log transformed chemistry results data -------- + # Write to log + writelog("\n#### Step 1", logfile = logfile, verbose = verbose) + writelog("\n##### Join with the LRM table by AnalyteName (Table 3.5 on page 39 of Technical Manual)", logfile = logfile, verbose = verbose) + + # Write the table 3.6 (Since it is called table 3.6 in the SQO Technical Manual 3rd Edition) to the log file + table3.6 <- " + | Category | Range | Category Score | + |-------------------|-----------------|----------------| + | Minimal Exposure | < 0.33 | 1 | + | Low Exposure | ≥ 0.33 - ≤ 0.49 | 2 | + | Moderate Exposure | > 0.49 - ≤ 0.66 | 3 | + | High Exposure | > 0.66 | 4 | + " + writelog("##### Here is the table:", logfile = logfile, verbose = verbose) + writelog(table3.6, logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_lrm1 <- lrm_table %>% + left_join(chemdata.log10, by = 'AnalyteName') + + # Write to code portion of the logs + writelog( + "", + code = ' + chemdata_lrm1 <- lrm_table %>% + left_join(chemdata.log10, by = \'AnalyteName\') + ', + data = chemdata_lrm1, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_lrm1, logfile = logfile, filename = 'LRM_Step1.csv', linktext = 'Download Step 1 LRM Data', verbose = verbose) + + + + + # -------- Step 2 - Get P Values for each analyte of each station -------- + # Write to log + writelog("\n#### Step 2", logfile = logfile, verbose = verbose) + writelog("\n##### p = (exp(B0 + B1 * logResult) / (1 + exp(B0 + B1 * logResult))) ---> Round to 2 decimal places", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_lrm2 <- chemdata_lrm1 %>% mutate( - # page 38 of Technical Manual + # page 38 of Technical Manual 3rd Edition June 2021 p = (exp(B0 + B1 * logResult) / (1 + exp(B0 + B1 * logResult))) %>% round(2) - ) %>% + ) + + # Write to code portion of the logs + writelog( + "", + code = ' + chemdata_lrm2 <- chemdata_lrm1 %>% + mutate( + # page 38 of Technical Manual 3rd Edition June 2021 + p = (exp(B0 + B1 * logResult) / (1 + exp(B0 + B1 * logResult))) %>% round(2) + ) + ', + data = chemdata_lrm2, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_lrm2, logfile = logfile, filename = 'LRM_Step2.csv', linktext = 'Download Step 2 LRM Data', verbose = verbose) + + + + # -------- Step 3 - Get Max P Value -------- + writelog("\n#### Step 3", logfile = logfile, verbose = verbose) + writelog("\n##### Group by StationID (this function and R package overall assumes you are feeding it data for one sediment sample per stationid)", logfile = logfile, verbose = verbose) + writelog("\n##### take max value of the variable p within each grouping (grouped by StationID)", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_lrm3 <- chemdata_lrm2 %>% # Page 39 of Technical Manual + # Get the max of the "p" values group_by(StationID) %>% summarize( Score = if_else( all(is.na(p)), NA_real_, suppressWarnings(max(p, na.rm = T)) ) ) %>% - ungroup() %>% + ungroup() + + + # Write to code portion of the logs + writelog( + "", + code = ' + chemdata_lrm3 <- chemdata_lrm2 %>% + # Page 39 of Technical Manual + # Get the max of the "p" values + group_by(StationID) %>% + summarize( + Score = if_else( + all(is.na(p)), NA_real_, suppressWarnings(max(p, na.rm = T)) + ) + ) %>% + ungroup() + ', + data = chemdata_lrm3, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_lrm3, logfile = logfile, filename = 'LRM_Step3.csv', linktext = 'Download Step 3 LRM Data', verbose = verbose) + + + + + + # -------- Step 4 - Assign the category and category score -------- + + # Write to the logs + writelog("\n#### Step 4", logfile = logfile, verbose = verbose) + writelog("\n##### Assign LRM Categories based on thresholds defined in table 3.6 of Technical Manual page 40", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_lrm.final <- chemdata_lrm3 %>% mutate( - # Page 34 of Technical Manual + # Page 40 of Technical Manual (3rd edition June 2021) `Category Score` = case_when( Score < 0.33 ~ 1, Score >= 0.33 & Score <= 0.49 ~ 2, @@ -116,13 +267,47 @@ LRM <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', StationID, Index, Score, Category, `Category Score` ) - writelog("*** DATA *** Final LRM scores dataframe in LRM_scores-final.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata_lrm, logfile = file.path(dirname(logfile), 'LRM_scores-final.csv'), filetype = 'csv', verbose = verbose) - writelog("\nEND Chem LRM Function\n", logfile = logfile, verbose = verbose) - writelog("----------------------------------------------------------------------------------------------------\n", logfile = logfile, verbose = verbose) + # Write to code portion of the logs + writelog( + "", + code = ' + chemdata_lrm.final <- chemdata_lrm3 %>% + mutate( + # Page 40 of Technical Manual (3rd edition June 2021) + `Category Score` = case_when( + Score < 0.33 ~ 1, + Score >= 0.33 & Score <= 0.49 ~ 2, + Score > 0.49 & Score <= 0.66 ~ 3, + Score > 0.66 ~ 4, + TRUE ~ NA_real_ + ), + Category = case_when( + `Category Score` == 1 ~ "Minimal Exposure", + `Category Score` == 2 ~ "Low Exposure", + `Category Score` == 3 ~ "Moderate Exposure", + `Category Score` == 4 ~ "High Exposure", + TRUE ~ NA_character_ + ) + ) %>% + mutate( + Index = \'LRM\' + ) %>% + select( + StationID, Index, Score, Category, `Category Score` + ) + ', + data = chemdata_lrm.final, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_lrm.final, logfile = logfile, filename = 'LRM_Final.csv', linktext = 'Download Final LRM Data (Within LRM Function)', verbose = verbose) + - return(chemdata_lrm) + writelog("##### END Chem LRM Function\n", logfile = logfile, verbose = verbose) + + return(chemdata_lrm.final) # uncomment to make the above block a function again } @@ -167,61 +352,148 @@ LRM <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', # HPAH - 1 # LPAH - 1 # All the rest - 2 -CSI <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', paste0(format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), '-log.txt') ), verbose = T) { +CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd(), 'logs', paste0(format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), '-chemlog.Rmd') ), verbose = T) { "csi_weight" # Initialize Logging init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) - writelog("\nBEGIN Chem CSI Function\n", logfile = logfile, verbose = verbose) + writelog("\n\n## Chem CSI Function\n", logfile = logfile, verbose = verbose) + + + + # ---- Save the raw input to an RData file (for the sake of those who want the auditing logs) ---- + rawinput.filename <- 'csi.input.RData' + if (verbose) { + save(chemdata.csi.input, file = file.path( dirname(logfile), rawinput.filename )) + } + + writelog( + "\n### Initial input to CSI:", + logfile = logfile, + code = paste0("load('", rawinput.filename, "')"), + data = chemdata.csi.input, + verbose = verbose + ) + create_download_link(data = chemdata.csi.input, logfile = logfile, filename = 'CSI_InitialInputData.csv', linktext = 'Download Initial Input to Chem CSI Function', verbose = verbose) + - writelog("*** DATA *** Chem data (Initial input to CSI function) may be found in csi_initial_input_data-step0.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata, logfile = file.path(dirname(logfile), 'CSI_initial_input_data-step0.csv'), filetype = 'csv', verbose = verbose) - # get it in a usable format + # ---- Make the call to the Preprocessing function (If not already preprocessed) ---- if (!preprocessed) { - writelog("Input to CSI function did not come preprocessed.", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("\nPrepping/Preprocessing chem data...", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - chemdata <- chemdata_prep(chemdata, logfile = logfile, verbose = verbose) - writelog("\nDone prepping chem data...", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # Write to the log file + writelog( + "\n#### Preprocessing chemistry data (Details within chemdata_prep to be shown later as well):\n", + logfile = logfile, + verbose = verbose + ) - writelog("*** DATA *** Chem data (Input to CSI function after preprocessing) may be found in LRM_preprocessed_input_data-step0.1.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata, logfile = file.path(dirname(logfile), 'CSI_preprocessed_input_data-step0.1.csv'), filetype = 'csv', verbose = verbose) + # Actually call the function + chemdata.csi.input <- chemdata_prep(chemdata.csi.input, logfile = logfile, verbose = verbose) + + # Create code block and download link to the preprocessed data + writelog( + "\n#### Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", + logfile = logfile, + code = 'chemdata.csi.input <- chemdata_prep(chemdata.csi.input, verbose = FALSE)', + verbose = verbose + ) + create_download_link(data = chemdata.csi.input, logfile = logfile, filename = 'CSI_PreProcessedInput.csv', linktext = 'Download Preprocessed Input to CSI Function (after calling chemdata_prep)', verbose = verbose) } - # Combine CSI Weight values with data based on the compound. Exclude compounds not in CSI calculation. - writelog("Combine CSI Weight values with data based on the compound. Exclude compounds not in CSI calculation.", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("*** DATA *** CSI Weight values may be found in CSI_weights.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(csi_weight, logfile = file.path(dirname(logfile), 'CSI_weights.csv'), filetype = 'csv', verbose = verbose) - - chemdata_csi <- csi_weight %>% left_join(chemdata, by = "AnalyteName") - - writelog("*** DATA *** Chem data (Input to CSI function) combined with CSI Weight values may be found in CSI_input_data_with_weights-step1.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata_csi, logfile = file.path(dirname(logfile), 'CSI_input_data_with_weights-step1.csv'), filetype = 'csv', verbose = verbose) - - writelog("Manipulate the CSI chem data per Technial Manual page 40-42", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Make the CSI weights NA where the Result value is NA", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Assign exposure score according to appropriate category (see Table 3.7 on page 40 of the Technical Manual)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Assign weighted exposure score (exposure_score x weight)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Group by StationID (this function and R package overall assumes you are feeding it data for one sediment sample per stationid)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Sum the weighted scores and the weights - then take the sum of the weighted scores divided by the sum of the weights. - This is the CSI Score", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Assign CSI Categories based on thresholds defined in table 3.9 of Technical Manual page 42", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- Below 1.69 is 'Minimal'", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- From 1.69 to 2.33 is 'Low' (upper and lower bounds are inclusive)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- From 2.33 to 2.99 is 'Moderate' (lower bound is exclusive, upper bound is inclusive)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("---- Above 2.99 is 'High'", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - - chemdata_csi <- chemdata_csi %>% + # ---- CSI Weight DataFrame - Do not include a download option ---- + writelog( + "\n#### This is the CSI Weight DataFrame (Based on table 3.7 and 3.8 on page 40-41 of the manual - 3rd edition June 2021):", + logfile = logfile, + verbose = verbose + ) + writelog("\n##### (Based on table 3.7 and 3.8 on page 40-41 of the manual - 3rd edition June 2021):", logfile = logfile, verbose = verbose) + writelog("\n###### (the \"BDC\" columns represent the cutoff points to separate the exposure score bins):", logfile = logfile, verbose = verbose) + writelog( + "", + data = csi_weight, + logfile = logfile, + verbose = verbose, + pageLength = 12 + ) + + # Here we begin to manipulate the CSI chem data per Technical Manual page 40-42 + writelog("\n### Manipulate the CSI chem data per Technical Manual page 40-42\n", logfile = logfile, verbose = verbose) + + + + # ---- Step 0. Combine CSI Weight values with data based on the compound. Exclude compounds not in CSI calculation. ---- + + # Write to the log file + writelog( + "\n#### Combine CSI Weight values with data based on the compound. Exclude compounds not in CSI calculation.", + logfile = logfile, + verbose = verbose + ) + + # Actually execute the code + chemdata_csi <- csi_weight %>% left_join(chemdata.csi.input, by = "AnalyteName") + + # Write code portion to the log file + writelog( + "", + code = 'chemdata_csi <- csi_weight %>% left_join(chemdata.csi.input, by = "AnalyteName")', + data = chemdata_csi, + logfile = logfile, + verbose = verbose + ) + + # Serve it up for download + create_download_link(data = chemdata_csi, logfile = logfile, filename = 'CSI_Step0.csv', linktext = 'Download CSI Step0', verbose = verbose) + + + + + # ---- CSI Calclulation Step Description ---- + writelog("\n\n#### Calculate CSI\n", logfile = logfile, verbose = verbose) + writelog("\n##### Step 1:\n", logfile = logfile, verbose = verbose) + writelog("- Step 1a: Make the CSI weights NA where the Result value is NA\n", logfile = logfile, verbose = verbose) + writelog("- Step 1b: Assign exposure score according to appropriate category (see Table 3.7 on page 40 of the Technical Manual 3rd edition)\n", logfile = logfile, verbose = verbose) + writelog("- Step 1c: Assign weighted exposure score (exposure_score x weight)\n", logfile = logfile, verbose = verbose) + writelog("\n##### Step 2:\n", logfile = logfile, verbose = verbose) + writelog("- Step 2a: Group by StationID (this function and R package overall assumes you are feeding it data for one sediment sample per stationid)\n", logfile = logfile, verbose = verbose) + writelog("- Step 2b: Sum the weighted scores and the weights - then take the sum of the weighted scores divided by the sum of the weights. - This is the CSI Score\n", logfile = logfile, verbose = verbose) + writelog("- Step 2c: Assign CSI Categories based on thresholds defined in table 3.9 of Technical Manual page 42 (3rd edition)\n", logfile = logfile, verbose = verbose) + table3.9 <- " + | Category | Range | Category Score | + |-------------------|-----------------|----------------| + | Minimal Exposure | < 1.69 | 1 | + | Low Exposure | ≥ 1.69 - ≤ 2.33 | 2 | + | Moderate Exposure | > 2.33 - ≤ 2.99 | 3 | + | High Exposure | > 2.99 | 4 | + " + writelog(table3.9, logfile = logfile, verbose = verbose) + writelog("\n##### Step 3:\n", logfile = logfile, verbose = verbose) + writelog("- Step 3a: Assign Numerical Category Score and SQO Category Name", logfile = logfile, verbose = verbose) + + + + + + + + # ---- Calculate CSI Step 1 ---- + + # Write to the log file + writelog("\n##### Execute Step 1:\n", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_csi1 <- chemdata_csi %>% mutate( weight = case_when( - # we needed this because the weights are only summed for mean CSI if the Result value exists + # We needed this because the weights are only summed for mean CSI if the Result value exists # So we have to make it NA_real_ where the Result is NA !is.na(Result) ~ weight, TRUE ~ NA_real_ ), - # Page 40 of Technical Manual + # Page 40 of Technical Manual (3rd edition June 2021) exposure_score = case_when( Result <= BDC1 ~ 1, (Result > BDC1) & (Result <= BDC2) ~ 2, @@ -229,13 +501,52 @@ CSI <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', Result > BDC3 ~ 4, TRUE ~ NA_real_ ), - # Page 41 of Technical Manual - weighted_score = exposure_score * weight # manual calls this the weighted score (page 41) - ) %>% + # Page 41 of Technical Manual (3rd edition June 2021) + weighted_score = exposure_score * weight # manual calls this the weighted score (page 41) (3rd edition June 2021) + ) + + # Write code portion of the logs + writelog( + "", + code = ' + chemdata_csi1 <- chemdata_csi %>% + mutate( + weight = case_when( + # We needed this because the weights are only summed for mean CSI if the Result value exists + # So we have to make it NA_real_ where the Result is NA + !is.na(Result) ~ weight, + TRUE ~ NA_real_ + ), + # Page 40 of Technical Manual (3rd edition June 2021) + exposure_score = case_when( + Result <= BDC1 ~ 1, + (Result > BDC1) & (Result <= BDC2) ~ 2, + (Result > BDC2) & (Result <= BDC3) ~ 3, + Result > BDC3 ~ 4, + TRUE ~ NA_real_ + ), + # Page 41 of Technical Manual (3rd edition June 2021) + weighted_score = exposure_score * weight # manual calls this the weighted score (page 41) (3rd edition June 2021) + ) + ', + data = chemdata_csi1, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_csi1, logfile = logfile, filename = 'CSI_Step1.csv', linktext = 'Download CSI Step1', verbose = verbose) + + + # ---- Calculate CSI Step 2 ---- + + # Write to the log file + writelog("\n##### Execute Step 2:\n", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_csi2 <- chemdata_csi1 %>% group_by( - # We need to also group by SampleDate. - # Current SampleData does not have a date in it - StationID #,SampleDate ? + # We would need to also group by SampleDate - although typically calculating SQO for Bight data, we dont need a sampledate + StationID ) %>% summarize( # Page 41 of Technical Manual (Chart) @@ -243,10 +554,44 @@ CSI <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', weight = sum(weight, na.rm = T), Score = round(weighted_score_sum / weight, 2) ) %>% - ungroup() %>% + ungroup() + + # Write code portion of the logs + writelog( + "", + code = ' + chemdata_csi2 <- chemdata_csi1 %>% + group_by( + # We would need to also group by SampleDate - although typically calculating SQO for Bight data, we dont need a sampledate + StationID + ) %>% + summarize( + # Page 41 of Technical Manual 3rd edition (Chart) + weighted_score_sum = sum(weighted_score, na.rm = T), + weight = sum(weight, na.rm = T), + Score = round(weighted_score_sum / weight, 2) + ) %>% + ungroup() + ', + data = chemdata_csi2, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_csi2, logfile = logfile, filename = 'CSI_Step2.csv', linktext = 'Download CSI Step2', verbose = verbose) + + + # ---- Calculate CSI Step 3 ---- + # write to the logs + writelog("\n##### Execute Step 3:\n", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_csi.final <- chemdata_csi2 %>% + # Remove unnecessary columns - weighted_score_sum and weight select(-c(weighted_score_sum, weight)) %>% + + # Assign category based on the bins specified in page 42 of Technical Manual (3rd edition from June 2021) mutate( - # Page 42 of Technical Manual `Category Score` = case_when( Score < 1.69 ~ 1, (Score >= 1.69) & (Score <= 2.33) ~ 2, @@ -262,20 +607,61 @@ CSI <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', TRUE ~ NA_character_ ) ) %>% + + # To identify that the score is for the Chemical Score Index mutate( Index = "CSI" ) %>% + + # Ordering the columns select( StationID, Index, Score, Category, `Category Score` ) - writelog("*** DATA *** Final CSI scores dataframe in CSI_scores-final.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata_csi, logfile = file.path(dirname(logfile), 'CSI_scores-final.csv'), filetype = 'csv', verbose = verbose) - - writelog("\nEND Chem CSI Function\n", logfile = logfile, verbose = verbose) - writelog("----------------------------------------------------------------------------------------------------\n", logfile = logfile, verbose = verbose) + # write to the code portion of the logs - provide final data for download + writelog( + "", + code = ' + chemdata_csi.final <- chemdata_csi2 %>% + # Remove unnecessary columns - weighted_score_sum and weight + select(-c(weighted_score_sum, weight)) %>% + + # Assign category based on the bins specified in page 42 of Technical Manual (3rd edition from June 2021) + mutate( + `Category Score` = case_when( + Score < 1.69 ~ 1, + (Score >= 1.69) & (Score <= 2.33) ~ 2, + (Score >= 2.33) & (Score <= 2.99) ~ 3, + Score > 2.99 ~ 4, + TRUE ~ NA_real_ + ), + Category = case_when( + `Category Score` == 1 ~ "Minimal Exposure", + `Category Score` == 2 ~ "Low Exposure", + `Category Score` == 3 ~ "Moderate Exposure", + `Category Score` == 4 ~ "High Exposure", + TRUE ~ NA_character_ + ) + ) %>% + + # To identify that the score is for the Chemical Score Index + mutate( + Index = "CSI" + ) %>% + + # Ordering the columns + select( + StationID, Index, Score, Category, `Category Score` + ) + ', + data = chemdata_csi.final, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = chemdata_csi.final, logfile = logfile, filename = 'CSI_Final.csv', linktext = 'Download CSI Final (Final DataFrame within CSI Function)', verbose = verbose) - return(chemdata_csi) + return(chemdata_csi.final) } # ------------ Integrated Score -------------- @@ -304,35 +690,152 @@ CSI <- function(chemdata, preprocessed = F, logfile = file.path(getwd(), 'logs', #' chem.sqo(chem_sampledata) # get scores and see output #' #' @export -chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'log.txt' ), verbose = T) { +chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'chemlog.Rmd' ), verbose = T) { - # Initialize Logging + # ---- Initialize Logging ---- init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) - writelog("\nBEGIN Chem SQO Function\n", logfile = logfile, verbose = verbose) + writelog("\n# Chemistry SQO Logs\n", logfile = logfile, verbose = verbose) + writelog("\n## Chemistry SQO Main Function\n", logfile = logfile, verbose = verbose) + - writelog("About to preprocess chemistry data", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # ---- Save the raw input to an RData file (for the sake of those who want the auditing logs) ---- + rawinput.filename <- 'chem.sqo.input.RData' + if (verbose) { + save(chemdata, file = file.path( dirname(logfile), rawinput.filename )) + } + + # Display raw input data, create a download link for the knitted final RMarkdown output + writelog( + "\n### Raw input to chem.sqo:", + logfile = logfile, + code = paste0("load('", rawinput.filename, "') ### This will load a dataframe called 'chemdata' into your environment"), + verbose = verbose + ) + create_download_link(data = chemdata, logfile = logfile, filename = 'ChemSQO-RawInput.csv', linktext = 'Download Raw Input to Chem SQO Function', verbose = verbose) + + + + # ---- Make the call to the Preprocessing function ---- + # Write to the log file + writelog( + "\n### Preprocessing chemistry data (Details within chemdata_prep to be shown later as well):\n", + logfile = logfile, + verbose = verbose + ) + + # Actually call the function chemdata <- chemdata_prep(chemdata, logfile = logfile, verbose = verbose) - writelog("About to call LRM function within chem.sqo", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # Create code block and download link to the preprocessed data + writelog( + "\n### Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", + logfile = logfile, + code = 'chemdata <- chemdata_prep(chemdata, verbose = FALSE)', + data = chemdata, + verbose = verbose + ) + create_download_link(data = chemdata, logfile = logfile, filename = 'ChemSQO-PreProcessedInput.csv', linktext = 'Download Preprocessed Input to Chem SQO Function (after calling chemdata_prep)', verbose = verbose) + + + + # ---- Make the call to the LRM function ---- + # Write to the log file + writelog( + "\n### Call LRM function within chem.sqo....\n", + logfile = logfile, + verbose = verbose + ) + + # Actually call it chemdata_lrm <- LRM(chemdata, preprocessed = T, logfile = logfile, verbose = verbose) - writelog("About to call CSI function within chem.sqo", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # Create code block and download link + writelog( + "\n### LRM Function finished executing", + logfile = logfile, + verbose = verbose + ) + writelog( + "##### Here is its final output along with a code block (for R Studio users):", + logfile = logfile, + code = 'chemdata_lrm <- LRM(chemdata, preprocessed = TRUE, verbose = FALSE)', + data = chemdata_lrm, + verbose = verbose + ) + create_download_link(data = chemdata_lrm, logfile = logfile, filename = 'ChemSQO-LRM-Output.csv', linktext = 'Download Final LRM Output', verbose = verbose) + + + + # ---- Make the call to the CSI function ---- + # Write to the log file + writelog( + "\n### Call CSI function within chem.sqo\n", + logfile = logfile, + verbose = verbose + ) + + # Actually call it chemdata_csi <- CSI(chemdata, preprocessed = T, logfile = logfile, verbose = verbose) - # We should probably put some checks on the input data here - # if it doesn't meet requirements, call stop function + # Create code block and download link + writelog( + "\n### CSI Function finished executing", + logfile = logfile, + verbose = verbose + ) + writelog( + "##### Here is its final output along with a code block (for R Studio users):", + logfile = logfile, + code = 'chemdata_csi <- CSI(chemdata, preprocessed = TRUE, verbose = FALSE)', + data = chemdata_csi, + verbose = verbose + ) + create_download_link(data = chemdata_csi, logfile = logfile, filename = 'ChemSQO-CSI-Output.csv', linktext = 'Download Final CSI Output', verbose = verbose) + + + + + + # ---- Subsequent steps after CSI and LRM ---- + writelog( + "\n## Subsequent Steps After CSI and LRM", + logfile = logfile, + verbose = verbose + ) + - writelog("RBind LRM and CSI dataframes", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("Assign the Score according to the mean of the CSI and LRM score", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("If CSI and LRM are one step apart (for example, the CSI is 4 and the LRM is 3), take the ceiling of the mean (which basically would equate to the max of the two)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("The column called 'Category Score' will be the same as the score", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("The column called 'Category' is the 'human readable' SQO Assessment (Minimal, Low, Moderate, High)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("The Final output dataframe will have the overall Chem Assessment score, along with the individual CSI and LRM values", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # ---- First Subsequent Step: Rbind the CSI and LRM Dataframes ---- - combined <- rbind(chemdata_lrm, chemdata_csi) %>% - arrange(StationID) %>% + # Actually execute the code + combined1 <- rbind(chemdata_lrm, chemdata_csi) %>% + arrange(StationID) + + # Write to the logs and make the code block and datatable display + writelog( + "\n#### First Subsequent Step: RBind the CSI and LRM Dataframes", + logfile = logfile, + code = ' + combined1 <- rbind(chemdata_lrm, chemdata_csi) %>% + arrange(StationID) + ', + data = combined1, + verbose = verbose + ) + + # Make the download link + create_download_link(data = combined1, logfile = logfile, filename = 'CSI_LRM_Combine_step1.csv', linktext = 'Download CSI_LRM_Combine_step1.csv', verbose = verbose) + + + + + + # ---- Second Subsequent Step: Group by StationID and take the average of CSI and LRM ---- + # --- If the average is not an integer, take the ceiling of it (round it up) + + # Here, we actually execute the code + combined2 <- combined1 %>% group_by(StationID) %>% summarize( # we call ceiling because we are always wanting to round up @@ -345,7 +848,44 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # na.rm is FALSE by default - so I'm not sure why it is specified as false here - It is the same exact thing `Category Score` = ceiling(mean(`Category Score`, na.rm = F)) # Category Score is the same as the Score in this case ) %>% - ungroup() %>% + ungroup() + + # Write to the log + writelog( + "\n#### Second Subsequent Step: Group by StationID and take the average of CSI and LRM", + logfile = logfile, + code = ' + combined2 <- combined1 %>% + group_by(StationID) %>% + summarize( + # we call ceiling because we are always wanting to round up + # err on the side of determining that a site is more impacted, rather than not + # na.rm is FALSE by default - so I\'m not sure why it is specified as false in the Category Score one - It is the same exact thing + Score = ceiling(mean(`Category Score`)), + + # na.rm = F, since we need both CSI and LRM to determine the Chemistry LOE score + # although if you can calculate one, you should be able to calculate the other as well + # na.rm is FALSE by default - so I\'m not sure why it is specified as false here - It is the same exact thing + `Category Score` = ceiling(mean(`Category Score`, na.rm = F)) # Category Score is the same as the Score in this case + + ) %>% + ungroup() + ', + data = combined2, + verbose = verbose + ) + + # Make the download link + create_download_link(data = combined2, logfile = logfile, filename = 'CSI_LRM_Combine_step2.csv', linktext = 'Download CSI_LRM_Combine_step2.csv', verbose = verbose) + + + + + + # ---- Third Subsequent Step: Convert numeric score to the human readable category ---- + + # Actually execute the code + combined3 <- combined2 %>% mutate( Index = "Integrated SQO", Category = case_when( @@ -358,20 +898,67 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t ) %>% select( StationID, Index, Score, Category, `Category Score` - ) %>% + ) + + # Write to the log + writelog( + "\n#### Third Subsequent Step: Convert numeric score to the human readable category", + logfile = logfile, + code = ' + combined3 <- combined2 %>% + mutate( + Index = "Integrated SQO", + Category = case_when( + Score == 1 ~ "Minimal Exposure", + Score == 2 ~ "Low Exposure", + Score == 3 ~ "Moderate Exposure", + Score == 4 ~ "High Exposure", + TRUE ~ NA_character_ + ) + ) %>% + select( + StationID, Index, Score, Category, `Category Score` + ) + ', + data = combined3, + verbose = verbose + ) + + # Make the download link + create_download_link(data = combined3, logfile = logfile, filename = 'CSI_LRM_Combine_step3.csv', linktext = 'Download CSI_LRM_Combine_step3.csv', verbose = verbose) + + + + # ---- Fourth Subsequent Step: Combine CSI, LRM, and Integrated Score dataframes, convert factors to characters, and order it by StationID ---- + combined.final <- combined3 %>% rbind(chemdata_lrm, chemdata_csi) %>% mutate_if(is.factor,as.character) %>% arrange( StationID, Index ) - writelog("*** DATA *** Final Chem SQO Dataframe in CHEMISTRY-SQO-FINAL.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(combined, logfile = file.path(dirname(logfile), 'CHEMISTRY-SQO-FINAL.csv'), filetype = 'csv', verbose = verbose) + # Write to the log + writelog( + "\n#### Fourth Subsequent Step: Combine CSI, LRM, and Integrated Score dataframes, convert factors to characters, and order it by StationID", + logfile = logfile, + code = ' + combined.final <- combined3 %>% + rbind(chemdata_lrm, chemdata_csi) %>% + mutate_if(is.factor,as.character) %>% + arrange( + StationID, Index + ) + ', + data = combined.final, + verbose = verbose + ) + + # Make the download link + create_download_link(data = combined.final, logfile = logfile, filename = 'CSI_LRM_Combine_step3.csv', linktext = 'Download CSI_LRM_Combine_step3.csv', verbose = verbose) writelog("\nEND Chem SQO Function\n", logfile = logfile, verbose = verbose) - writelog("----------------------------------------------------------------------------------------------------\n", logfile = logfile, verbose = verbose) - return(combined) + return(combined.final) } @@ -406,129 +993,317 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t #' chemdata_prep(chem_sampledata) # get scores and see output #' #' @export -chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'log.txt' ), verbose = T){ +chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'chemlog.Rmd' ), verbose = T){ # Initialize Logging init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) - writelog("\nBEGIN Function: chemdata_prep\n", logfile = logfile, verbose = verbose) + writelog("\n\n## Chemistry Preprocessing function (chemdata_prep)\n", logfile = logfile, verbose = verbose) + + + # ---- Save the raw input to an RData file (for the sake of those who want the auditing logs) ---- + rawinput.filename <- 'chemdata_prep.input.RData' + if (verbose) { + save(chemdata_prep.input, file = file.path( dirname(logfile), rawinput.filename )) + } + + writelog( + "\n### Initial input to chemdata_prep:", + logfile = logfile, + code = paste0("load('", rawinput.filename, "')"), + data = chemdata_prep.input, + verbose = verbose + ) + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.csv', linktext = 'Download Initial Input to Chem Preprocessing Function', verbose = verbose) + # Here chemdata consists of data in the same format as our database, with the columns # stationid, analytename, result, rl, mdl - writelog("*** DATA *** Chemistry data as fed into the function chemdata_prep may be found in chemdata-preprocessing-step0.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step0.csv'), filetype = 'csv', verbose = verbose) - # lowercase column names - names(chem) <- names(chem) %>% tolower() - writelog("Lowercase column names of input data", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + + # ---- Step 1: lowercase column names ---- + writelog( + "\n#### Step 1: Lowercase column names of input data:", + logfile = logfile, + verbose = verbose + ) + names(chemdata_prep.input) <- names(chemdata_prep.input) %>% tolower() writelog( - "*** DATA *** Chemistry data after lowercase of column names may be found in the function chemdata_prep may be found in chemdata-preprocessing-step1.1.csv", + "", logfile = logfile, + code = 'names(chemdata_prep.input) <- names(chemdata_prep.input) %>% tolower()', + data = chemdata_prep.input, verbose = verbose, - prefix = hyphen.log.prefix + pageLength = 5 ) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step1.1.csv'), filetype = 'csv', verbose = verbose) + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step1.csv', linktext = 'Download Preprocessing Data (Step 1)', verbose = verbose) + + + + + - writelog("Chemistry data after renaming fielddup colnames to 'fieldrep' may be found in chemdata-preprocessing-step1.2.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - if ( 'fieldduplicate' %in% names(chem) ) { - chem <- chem %>% rename(fieldrep = fieldduplicate) + # ---- Step 2: Rename fieldduplicate columns to "fieldrep" ---- + writelog("\n#### Step 2: Renaming fielddup colnames to 'fieldrep'", logfile = logfile, verbose = verbose) + if ( 'fieldduplicate' %in% names(chemdata_prep.input) ) { + chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fieldduplicate) } - if ( 'fieldreplicate' %in% names(chem) ) { - chem <- chem %>% rename(fieldrep = fieldreplicate) + if ( 'fieldreplicate' %in% names(chemdata_prep.input) ) { + chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fieldreplicate) } - if ( 'fielddup' %in% names(chem) ) { - chem <- chem %>% rename(fieldrep = fielddup) + if ( 'fielddup' %in% names(chemdata_prep.input) ) { + chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fielddup) } - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step1.2.csv'), filetype = 'csv', verbose = verbose) + # Write code to the logs + writelog( + "", + logfile = logfile, + code = " + if ( 'fieldduplicate' %in% names(chemdata_prep.input) ) { + chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fieldduplicate) + } + if ( 'fieldreplicate' %in% names(chemdata_prep.input) ) { + chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fieldreplicate) + } + if ( 'fielddup' %in% names(chemdata_prep.input) ) { + chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fielddup) + } + ", + data = chemdata_prep.input, + verbose = verbose, + pageLength = 5 + ) + # Serve it up for download + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step2.csv', linktext = 'Download Preprocessing Data (Step 2)', verbose = verbose) + + - if ('labreplicate' %in% names(chem)) { - writelog("Rename labreplicate to labrep", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - chem <- chem %>% rename(labrep = labreplicate) - } - writelog("*** DATA *** Chemistry data after renaming labrep colnames to 'labrep' may be found in chemdata-preprocessing-step1.3.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step1.3.csv'), filetype = 'csv', verbose = verbose) + + # ---- Step 3: Rename LabReplicate columns to "labrep" ---- + + writelog("\n#### Step 3: Rename labreplicate to labrep", logfile = logfile, verbose = verbose) + + # Actually execute + if ('labreplicate' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% rename(labrep = labreplicate) + } + + # Write code to logs writelog( - "Remove 2nd field/lab replicates from chem data. - chem data before removal of 2nd lab/field reps may be found in chemdata-preprocessing-step2.csv", + "", + code = " + if ('labreplicate' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% rename(labrep = labreplicate) + } + ", + data = chemdata_prep.input, logfile = logfile, verbose = verbose, - prefix = hyphen.log.prefix + pageLength = 5 ) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step2.csv'), filetype = 'csv', verbose = verbose) - # SampleTypeCode should be "Result" and LabReplicate should be 1 - # This is not explicitly from the SQO Manual, but rather just based on "Common sense" and guidance from Darrin Greenstein - # We take labreplicate 1 to avoid bias. - # Darrin said on 7/20/2022 "I'd just pick the first rep. That will randomly even out any bias." - # Check for 'sampletypecode' column - if ('sampletypecode' %in% names(chem)) { - chem <- chem %>% - filter(sampletypecode == 'Result') - } else { - msg <- "Warning: Column 'sampletypecode' was not provided - this may affect results if there are duplicate records for certain analytes" - warning(msg) - writelog(msg, logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - } + # Serve it up for download + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step3.csv', linktext = 'Download Preprocessing Data (Step 3)', verbose = verbose) + + + + + + + + # ---- Step 4: Remove the Field/Lab Replicate #2 (Keep only fieldrep/labrep #1) ---- + writelog( + "\n#### Step 4: Remove the Field/Lab Replicate #2 (Keep only fieldrep/labrep #1)", + logfile = logfile, + verbose = verbose + ) + + # Actually Execute the Step # Check for 'labrep' column - if ('labrep' %in% names(chem)) { - chem <- chem %>% + if ('labrep' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% filter(as.numeric(labrep) == 1) } else { msg <- "Warning: Column 'labrep' was not provided - this may affect results if there are duplicate records for certain analytes" warning(msg) - writelog(msg, logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog(msg, logfile = logfile, verbose = verbose) } # Check for 'fieldrep' column - if ('fieldrep' %in% names(chem)) { - chem <- chem %>% + if ('fieldrep' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% filter(as.numeric(fieldrep) == 1) } else { msg <- "Warning: Column 'fieldrep' was not provided - this may affect results if there are duplicate records for certain analytes" warning(msg) - writelog(msg, logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog(msg, logfile = logfile, verbose = verbose) } - # Check if the resulting chem_sampledata is empty after filtering - if (nrow(chem_sampledata) == 0) { - stop("In chemdata_prep - chemistry input data is empty after filtering sampletypecode == Result and labrep == 1 and fieldrep == 1") + # Write to code portion of the log + writelog( + "", + code = " + # Check for 'labrep' column + if ('labrep' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% + filter(as.numeric(labrep) == 1) + } else { + msg <- \"Warning: Column 'labrep' was not provided - this may affect results if there are duplicate records for certain analytes\" + warning(msg) + writelog(msg, logfile = logfile, verbose = verbose) + } + + # Check for 'fieldrep' column + if ('fieldrep' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% + filter(as.numeric(fieldrep) == 1) + } else { + msg <- \"Warning: Column 'fieldrep' was not provided - this may affect results if there are duplicate records for certain analytes\" + warning(msg) + writelog(msg, logfile = logfile, verbose = verbose) + } + ", + data = chemdata_prep.input, + logfile = logfile, + verbose = verbose, + pageLength = 5 + ) + # Serve it up for download + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step4.csv', linktext = 'Download Preprocessing Data (Step 4)', verbose = verbose) + + + + + + + + + # ---- Step 5: Filter for only "Result" sampletypes (i.e. no matrix spikes, lab blanks, etc - those should definitely not be in here) ---- + writelog( + '\n#### Step 5: Filter for only "Result" sampletypes', + logfile = logfile, + verbose = verbose + ) + writelog( + '\n##### (i.e. no matrix spikes, lab blanks, etc - those should definitely not be in here)', + logfile = logfile, + verbose = verbose + ) + + # SampleTypeCode should be "Result" + # This is not explicitly from the SQO Manual, but rather just based on "Common sense" and guidance from Darrin Greenstein + # We take labreplicate 1 to avoid bias. + # Darrin said on 7/20/2022 "I'd just pick the first rep. That will randomly even out any bias." + # Check for 'sampletypecode' column + if ('sampletypecode' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% + filter(sampletypecode == 'Result') + } else { + msg <- "Warning: Column 'sampletypecode' was not provided - this may affect results if there are duplicate records for certain analytes" + warning(msg) + writelog(msg, logfile = logfile, verbose = verbose) } - # Drop duplicates - chemdupes <- chem[(chem %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ] - chem <- chem[!(chem %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ] + # Write to code portion of the logs + writelog( + "", + code = ' + if (\'sampletypecode\' %in% names(chemdata_prep.input)) { + chemdata_prep.input <- chemdata_prep.input %>% + filter(sampletypecode == \'Result\') + } else { + msg <- "Warning: Column \'sampletypecode\' was not provided - this may affect results if there are duplicate records for certain analytes" + warning(msg) + } + ', + data = chemdata_prep.input, + logfile = logfile, + verbose = verbose, + pageLength = 5 + ) + # Serve it up for download + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step5.csv', linktext = 'Download Preprocessing Data (Step 5)', verbose = verbose) + + + # ---- Step 6: Check if the resulting chemdata_prep.input is empty after filtering ---- + writelog( + "\n#### Step 6: Check if the resulting chemdata_prep.input is empty after filtering", + logfile = logfile, + verbose = verbose + ) + if (nrow(chemdata_prep.input) == 0) { + stop("In chemdata_prep - chemistry input data is empty after filtering sampletypecode == Result and labrep == 1 and fieldrep == 1") + } + + # Write to code portion of the logs writelog( - "Dropping duplicates from chem - chem data AFTER removal of duplicates may be found in chemdata-preprocessing-step3.csv", + "", + code = ' + if (nrow(chemdata_prep.input) == 0) { + stop("In chemdata_prep - chemistry input data is empty after filtering sampletypecode == Result and labrep == 1 and fieldrep == 1") + } + ', + data = chemdata_prep.input, logfile = logfile, verbose = verbose, - prefix = hyphen.log.prefix + pageLength = 5 + ) + # Serve it up for download + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step6.csv', linktext = 'Download Preprocessing Data (Step 6)', verbose = verbose) + + + + + # ---- Step 7: Drop duplicates ---- + writelog( + "\n#### Step 7: Drop Duplicates", + logfile = logfile, + verbose = verbose ) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step3.csv'), filetype = 'csv', verbose = verbose) + # Isolate duplicates for display purposes + chemdupes <- chemdata_prep.input[(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ] writelog( - "Dropping duplicates from chem - chem data AFTER removal of duplicates may be found in chemdata-removed-duplicate-data.csv", + "\n##### Duplicate records:", + code = 'chemdupes <- chemdata_prep.input[(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ]', + data = chemdupes, logfile = logfile, verbose = verbose, - prefix = hyphen.log.prefix + pageLength = 5 ) - writelog(chem, file.path(dirname(logfile), 'chemdata-removed-duplicate-data.csv'), filetype = 'csv', verbose = verbose) + writelog(chemdata_prep.input, file.path(dirname(logfile), 'chemdata-preprocessing-step3.csv'), filetype = 'csv', verbose = verbose) + + # Purge duplicates from actual input data + chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ] writelog( - "(To be clear - these are records that were duplicated on stationid, analytename, sampletypecode, labrep, fieldrep)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix + "\n##### Chemdata Prep Input Data with Duplicates Removed:", + code = 'chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ]', + data = chemdata_prep.input, + logfile = logfile, + verbose = verbose, + pageLength = 5 ) + # Serve them up for download + create_download_link(data = chemdupes, logfile = logfile, filename = 'chemdata_prep.dupes.csv', linktext = 'Download Duplicated Records', verbose = verbose) + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step7.csv', linktext = 'Download Preprocessing Data (Step 7 - removed duplicates)', verbose = verbose) + - writelog("Converting Result, RL, MDL to numeric fields", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - # result, rl, mdl should be numeric fields - chem <- chem %>% + # ---- Step 8: Convert Result, RL, MDL to numeric fields ---- + writelog("\n#### Convert Result, RL, MDL to numeric fields", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_prep.input <- chemdata_prep.input %>% mutate( result = as.numeric(result), rl = as.numeric(rl), @@ -536,93 +1311,159 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. ) %>% filter(!is.na(stationid)) + # Write to the code portion of the logs writelog( - "*** DATA *** Chem data after conversion of Result, RL, MDL may be found in chemdata-preprocessing-step4.csv", + "", + code = ' + chemdata_prep.input <- chemdata_prep.input %>% + mutate( + result = as.numeric(result), + rl = as.numeric(rl), + mdl = as.numeric(mdl), + ) %>% + filter(!is.na(stationid)) + ', + data = chemdata_prep.input, logfile = logfile, verbose = verbose, - prefix = hyphen.log.prefix + pageLength = 5 ) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step4.csv'), filetype = 'csv', verbose = verbose) + # Serve it up for download + create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.step8.csv', linktext = 'Download Preprocessing Data (Step 8)', verbose = verbose) + + + + + + + + + # ------------------------------------------------------------------- Relevant Compounds --------------------------------------------------------------------------- + + + writelog("\n#### Analytes used in Chem SQO Calc (Based on Table 3.1 in SQO Manual - pages 19 and 20):", logfile = logfile, verbose = verbose) # Analytes that are not grouped in any particular category single_analytes <- c('Cadmium','Copper','Lead','Mercury','Zinc', 'alpha-Chlordane','gamma-Chlordane','trans-Nonachlor',"4,4'-DDT") + writelog( + "\n##### Single Compounds", + code = ' + # Analytes that are not grouped in any particular category + single_analytes <- c(\'Cadmium\',\'Copper\',\'Lead\',\'Mercury\',\'Zinc\', + \'alpha-Chlordane\',\'gamma-Chlordane\',\'trans-Nonachlor\',"4,4\'-DDT") + ', + logfile = logfile, + verbose = verbose + ) # High PAH # Table 3.1 in the SQO Manual (page 19) hpah <- c('Benz(a)anthracene', 'Benzo(a)pyrene', 'Benzo(e)pyrene', 'Chrysene', 'Dibenz(a,h)anthracene', 'Fluoranthene', 'Perylene','Pyrene') + writelog( + "\n##### High PAHs", + code = " + # High PAH + # Table 3.1 in the SQO Manual (page 19) + hpah <- c('Benz(a)anthracene', 'Benzo(a)pyrene', 'Benzo(e)pyrene', + 'Chrysene', 'Dibenz(a,h)anthracene', 'Fluoranthene', 'Perylene','Pyrene') + ", + logfile = logfile, + verbose = verbose + ) # Low PAH # Table 3.1 in the SQO Manual (page 19) lpah <- c('1-Methylnaphthalene', '1-Methylphenanthrene', '2,6-Dimethylnaphthalene', '2-Methylnaphthalene', 'Acenaphthene', 'Anthracene', 'Biphenyl', 'Fluorene', 'Naphthalene', 'Phenanthrene') + writelog( + "\n##### Low PAHs", + code = " + # Low PAH + # Table 3.1 in the SQO Manual (page 19) + lpah <- c('1-Methylnaphthalene', '1-Methylphenanthrene', '2,6-Dimethylnaphthalene', + '2-Methylnaphthalene', 'Acenaphthene', 'Anthracene', + 'Biphenyl', 'Fluorene', 'Naphthalene', 'Phenanthrene') + ", + logfile = logfile, + verbose = verbose + ) # The PCB's that we care about # Table 3.1 in the SQO Manual (page 20) relevant_pcbs <- paste( - 'PCB', c('008', '018', '028', '044', '052', '066', '101', '105', '110', '118', '128', '138', '153', '180', '187', '195'), sep = "-" + 'PCB', c('008', '018', '028', '044', '052', '066', '101', '105', '110', '118', '128', '138', '153', '180', '187', '195'), sep = '-' ) + writelog( + "\n##### PCB Compounds Which are used in Chemistry SQO", + code = " + # The PCB's that we care about + # Table 3.1 in the SQO Manual (page 20) + relevant_pcbs <- paste( + 'PCB', c('008', '018', '028', '044', '052', '066', '101', '105', '110', '118', '128', '138', '153', '180', '187', '195'), sep = '-' + ) + ", + logfile = logfile, + verbose = verbose + ) + + # (For the sake of Logging) + # Find the maximum length of the columns + max_length <- max(length(single_analytes), length(hpah), length(lpah), length(relevant_pcbs)) - writelog("Analytes used in Chem SQO Calc (Based on Table 3.1 in SQO Manual - pages 19 and 20):", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # Create the data frame + analytes_df <- data.frame( + `Single Compounds` = c(single_analytes, rep(NA, max_length - length(single_analytes)) ), + `High PAHs` = c(hpah, rep(NA, max_length - length(hpah))), + `Low PAHs` = c(lpah, rep(NA, max_length - length(lpah))), + `PCBs` = c(relevant_pcbs, rep(NA, max_length - length(relevant_pcbs))) + ) writelog( - paste( - c(single_analytes, hpah, lpah, relevant_pcbs), - collapse = ', ' - ), + "\n##### Table of All compounds used in Chemistry SQO calculation (Except DDDs, DDEs and 2,4'-DDT)", + logfile = logfile, + verbose = verbose + ) + writelog( + "\n##### These are handled separately since they are aggregated.", + logfile = logfile, + verbose = verbose + ) + writelog( + "\n##### Furthermore - the final preprocessed data needs 4,4'-DDT by itself as well as the total DDTs, so those must be taken care of separately", + code = ' + # Find the maximum length of the columns + max_length <- max(length(single_analytes), length(hpah), length(lpah), length(relevant_pcbs)) + + # Create the data frame + analytes_df <- data.frame( + `Single Compounds` = c(single_analytes, rep(NA, max_length - length(single_analytes)) ), + `High PAHs` = c(hpah, rep(NA, max_length - length(hpah))), + `Low PAHs` = c(lpah, rep(NA, max_length - length(lpah))), + `PCBs` = c(relevant_pcbs, rep(NA, max_length - length(relevant_pcbs))) + ) + ', + data = analytes_df, logfile = logfile, verbose = verbose, - prefix = hyphen.log.prefix + pageLength = 16 ) - writelog("To be clear - the script is filtering the chemistry data for the above analytes", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog('\n', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # ----------------------------------------------------------------------------------------------------------------------------------------------------------------- # - writelog("These analytes have no grouping and are handled individually", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(paste(single_analytes, collapse = ', '),logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog("\nThese analytes are the High PAHs", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(paste(hpah, collapse = ', '),logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog("\nThese analytes are the Low PAHs", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(paste(lpah, collapse = ', '),logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog("\nThese analytes are the the PCBs used in SQO analysis", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(paste(relevant_pcbs, collapse = ', '),logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog('\n', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - # The other group is the "DD's" DDT, DDE, DDD - # Can be done with grepl - # Version 0.3.0 update (2024-05-13) - remove duplicates but give a warning to the user - Robert Butler - # check for duplicates and issue a warning: - if (any(chem %>% duplicated())) { - warning("Duplicates detected in chemistry data. They will be removed in the calculation. Please check your input data.") - writelog( - "Duplicates detected in chemistry data. They will be removed in the calculation. Please check your input data." - , - logfile = logfile, - verbose = verbose, - prefix = paste(hyphen.log.prefix, 'WARNING: ') - ) - chem <- chem %>% distinct() - writelog( - "*** DATA *** Duplicates removed - chemdata-preprocessing-step4.1.csv" - , - logfile = logfile, - verbose = verbose, - prefix = paste(hyphen.log.prefix, 'WARNING: ') - ) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step4.1.csv'), filetype = 'csv', verbose = verbose) - } writelog("*** DATA *** Before filtering, grouping, converting missing result values - chemdata-preprocessing-step5.csv",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog(chem, file.path(dirname(logfile), 'chemdata-preprocessing-step5.csv'), filetype = 'csv', verbose = verbose) + writelog(chemdata_prep.input, file.path(dirname(logfile), 'chemdata-preprocessing-step5.csv'), filetype = 'csv', verbose = verbose) - writelog("filtering chemistry data to necessary analytes - etc", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("from step 5 to 6 this is what is performed:", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog("filtering chemistry data to necessary analytes - etc", logfile = logfile, verbose = verbose) + writelog("from step 5 to 6 this is what is performed:", logfile = logfile, verbose = verbose) writelog("Create a new column called compound which represents the group of the analyte (High/Low PAH, Total PCB, DDD, DDE. (DDTs not handled in this step)", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) writelog("remove records where compound is NA", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) writelog("Handle missing result values according to guidance of pages 30 and 31 of Technical manual", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) @@ -630,8 +1471,22 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. writelog("Multiply PCBs by 1.72", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) writelog("Sum results of grouped constituents/analytes", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) writelog("If all values in the group are non detects - take the max RL", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - # its hard to think of useful, good variable names - chemdata <- chem %>% + + + + + + + + # ---- Step 9: Filter for relevant compounds (Excluding DDTs which will be handled separately later) ---- + writelog( + "\n#### Step 9: Filter for relevant compounds (Excluding DDTs which will be handled separately later)", + logfile = logfile, + verbose = verbose + ) + + # Actually execute the code + chemdata <- chemdata_prep.input %>% # create a new column called compound. This is what we will group by, mutate( compound = case_when( @@ -654,6 +1509,47 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. !is.na(compound) ) + # Write to code portion of the logs + writelog( + "", + code = ' + chemdata <- chemdata_prep.input %>% + # create a new column called compound. This is what we will group by, + mutate( + compound = case_when( + analytename %in% single_analytes ~ analytename, + analytename %in% relevant_pcbs ~ "PCBs_total", + analytename %in% hpah ~ "HPAH", + analytename %in% lpah ~ "LPAH", + + # The Manual (3rd Edition - page 35) states that the Total DDT\'s DDE\'s and DDD\'s are the sum of 2,4\' and 4,4\' - because those are the only ones measured in Bight + # We decided to leave the script this way + # If for whatever odd reason, some weird compound that shouldnt have even gotten measured (2,2\') made its way into the dataset - we may as well include it + # June 3, 2024 + grepl("DDD",analytename) ~ "DDDs_total", + grepl("DDE",analytename) ~ "DDEs_total", + # The DDT total will be handled separately + TRUE ~ NA_character_ + ) + ) %>% + filter( + !is.na(compound) + ) + ', + data = chemdata, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step9.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Filtered (No DDT)') + + + + + + + # ----------------------------------------------------------------- Check Units of Data ----------------------------------------------------------------------- + writelog('\n#### Check the units of the input data', logfile = logfile, verbose = verbose) # v0.3.5 update # June 3, 2024 - check units @@ -679,21 +1575,59 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. stop("There are rows with incorrect units. Please check the following rows:\n", paste(capture.output(print(incorrect_rows)), collapse = "\n")) } else { - writelog("INFO: All units for chemistry input data are correct.",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) + writelog("INFO: All units for chemistry input data are correct.",logfile = logfile,verbose = verbose) } } else { warning("The 'units' column is missing from the chemistry input data") - writelog("WARNING: The 'units' column is missing from the chemistry input data",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) + writelog("WARNING: The 'units' column is missing from the chemistry input data",logfile = logfile,verbose = verbose) } + # write code to the Log R Markdown + writelog( + "", + code = ' + # June 3, 2024 - check units + # \'Cadmium\',\'Copper\',\'Lead\',\'Mercury\',\'Zinc\' - should be in Parts Per Million (ug/g, ug/g dw, or ppm) + # Everything else should be in Parts Per Billion (ng/g, ng/g dw, or ppb) + + # Define the analytes that should be in ppm + ppm_analytes <- c(\'Cadmium\', \'Copper\', \'Lead\', \'Mercury\', \'Zinc\') + + # Define the acceptable units for ppm and ppb + acceptable_units_ppm <- c(\'ug/g\', \'ug/g dw\', \'ppm\') + acceptable_units_ppb <- c(\'ng/g\', \'ng/g dw\', \'ppb\') + + # Check if the units column exists + if (\'units\' %in% names(chemdata)) { + # Check for rows that do not meet the criteria + incorrect_rows <- chemdata[ + (chemdata$analytename %in% ppm_analytes & !(chemdata$units %in% acceptable_units_ppm)) | + (!chemdata$analytename %in% ppm_analytes & !(chemdata$units %in% acceptable_units_ppb)), ] + + # If there are incorrect rows, stop and display a message + if (nrow(incorrect_rows) > 0) { + stop("There are rows with incorrect units. Please check the following rows:\n", + paste(capture.output(print(incorrect_rows)), collapse = "\n")) + } else { + print("INFO: All units for chemistry input data are correct.") + } + } else { + warning("The \'units\' column is missing from the chemistry input data") + } + + ', + logfile = logfile, + verbose = verbose + ) + # ----------------------------------------------------------------------------------------------------------------------------------------------------- # - writelog("*** DATA *** After filtering irrelevant compounds - chemdata-preprocessing-step6.1.csv",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog(chemdata, file.path(dirname(logfile), 'chemdata-preprocessing-step6.1.csv'), filetype = 'csv', verbose = verbose) - writelog("Dealing with missing result values (non detects)",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog("NA or negative result values are treated as missing values (covers -88, -99 or actual null values)",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) + # ---- Step 10: Deal with missing values/Non Detects ---- + writelog("\n#### Step 10: Deal with missing values/Non Detects",logfile = logfile,verbose = verbose) + writelog("\n##### Dealing with missing result values (non detects)",logfile = logfile,verbose = verbose) + writelog("\n##### NA or negative result values are treated as missing values (covers -88, -99 or actual null values)",logfile = logfile,verbose = verbose) chemdata <- chemdata %>% mutate( result = case_when( @@ -713,20 +1647,80 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. ) ) - writelog("*** DATA *** After dealing with missing result values/non detects - chemdata-preprocessing-step6.2.csv",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog(chemdata, file.path(dirname(logfile), 'chemdata-preprocessing-step6.2.csv'), filetype = 'csv', verbose = verbose) + writelog( + "", + code = ' + chemdata <- chemdata %>% + mutate( + result = case_when( + # This is for dealing with Non detects (Page 37 of SQO Manual, Paragraph titled Data Preparation) + # Note that if calculations using non-detected(ND) analytes are necessary, an estimated value must be used. + # One estimation approach is to use 50% of the MDL for any samples with ND results for that analyte; + # however, the previous section should be consulted for addressing ND values within summed groups of constituents. + # NA or negative result values are treated as missing values (covers -88, -99 or actual null values) + (coalesce(result, -88) < 0) & (compound %in% single_analytes) ~ as.numeric(1/2*mdl), + + # For the summed group of constituents, we get the directions of how to deal with them in page 36 of the SQO Manual + # First paragraph below table 3.4 + # "Compounds qualified as non-detected are treated as having a concentration of zero for the purpose of summing" + # NA or negative result values are treated as missing values (covers -88, -99 or actual null values) + (coalesce(result, -88) < 0) & !(compound %in% single_analytes) ~ 0, + TRUE ~ result + ) + ) + ', + data = chemdata, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step10.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 10 (Dealing with non detects)') + + # ---- Step 11: Multiply PCB Values by 1.72 ---- + writelog("\n#### Step 11: Multiply PCB Values by 1.72", logfile = logfile, verbose = verbose) + + # Actually execute the code chemdata <- chemdata %>% mutate( result = if_else( - # CASQO manual page 30, below table 3.4 - PCB result value gets multiplied by 1.72 + # CASQO manual page 36 (3rd edition June 2021), below table 3.4 - PCB result value gets multiplied by 1.72 # Modified - check where compound is PCBs_total rather than analytename (2024-05-13 in version 0.3.0 update) - Robert Butler compound == "PCBs_total", 1.72 * result, result ) ) - writelog("*** DATA *** After multiplying PCBs by 1.72 - chemdata-preprocessing-step6.3.csv",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog(chemdata, file.path(dirname(logfile), 'chemdata-preprocessing-step6.3.csv'), filetype = 'csv', verbose = verbose) + # Write code to R Markdown + writelog( + "", + code = ' + chemdata <- chemdata %>% + mutate( + result = if_else( + # CASQO manual page 36 (3rd edition June 2021), below table 3.4 - PCB result value gets multiplied by 1.72 + # Modified - check where compound is PCBs_total rather than analytename (2024-05-13 in version 0.3.0 update) - Robert Butler + compound == "PCBs_total", 1.72 * result, result + ) + ) + ', + data = chemdata, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step11.pcb.mult.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 11 (Multiply PCBs by 1.72)') + + + + + + + + + # ---- Step 12: If all are non detects in the group, assign it the max of the RLs ---- + writelog("Step 12: If all are non detects in the group, assign it the max of the RLs", logfile = logfile, verbose = verbose) + + # Actually execute the code chemdata <- chemdata %>% group_by( stationid, compound @@ -741,16 +1735,43 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. ) %>% ungroup() - writelog("Group by stationid and compound and sum the result values - if all are missing take the Max RL (Page 36 from Technical Manual)", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + # Write the code to the logs + writelog( + "\n##### Group by stationid and compound and sum the result values - if all are missing take the Max RL (Page 36 from Technical Manual)", + code = ' + chemdata <- chemdata %>% + group_by( + stationid, compound + ) %>% + summarize( + # if the sum of the results is zero, assign it the max of the RLs + # Page 36 of SQO Plan, first paragraph below table 3.4 + # "If all components of a sum are non-detected, then the highest reporting limit of any one compound in the group should be used to represent the sum value." + result = if_else( + sum(result, na.rm = T) != 0, sum(result, na.rm = T), max(rl) + ) + ) %>% + ungroup() + ', + data = chemdata, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step12.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 12 (Handle case where all in analytegroup are Non detects)') + + + + + + - writelog("*** DATA *** chemdata (without DDTs) after performing these steps may be found in chemdata-preprocessing-step6.4.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog(chemdata, file.path(dirname(logfile), 'chemdata-preprocessing-step6.4.csv'), filetype = 'csv', verbose = verbose) - writelog("Handle DDTs (according to page 30 of SQO technical manual", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- replace missing values with zero", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("-- Sum them - if all are non-detects then take the max RL", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - ddts_total <- chem %>% + writelog("Handle DDTs (according to page 30 of SQO technical manual", logfile = logfile, verbose = verbose) + writelog("-- replace missing values with zero", logfile = logfile, verbose = verbose) + writelog("-- Sum them - if all are non-detects then take the max RL", logfile = logfile, verbose = verbose) + ddts_total <- chemdata_prep.input %>% filter(grepl("DDT",analytename)) %>% mutate( compound = "DDTs_total", @@ -771,7 +1792,7 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. ) ) %>% ungroup() - writelog("*** DATA *** chemdata after performing these steps may be found in chemdata-preprocessing-step7 (DDTs).csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog("*** DATA *** chemdata after performing these steps may be found in chemdata-preprocessing-step7 (DDTs).csv", logfile = logfile, verbose = verbose) writelog(ddts_total, file.path(dirname(logfile), 'chemdata-preprocessing-step7 (DDTs).csv'), filetype = 'csv', verbose = verbose) # Conversation with Darrin on April 16th 2020 @@ -786,8 +1807,8 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. # HPAH - 1 # LPAH - 1 # All the rest - 2 - writelog("Combine DDT data with rest of chem data - perform necessary rounding", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) - writelog("Copper, Lead, Zinc, High/Low PAHs will be rounded to 1 decimal place; All others will be rounded to 2", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog("Combine DDT data with rest of chem data - perform necessary rounding", logfile = logfile, verbose = verbose) + writelog("Copper, Lead, Zinc, High/Low PAHs will be rounded to 1 decimal place; All others will be rounded to 2", logfile = logfile, verbose = verbose) chemdata <- chemdata %>% bind_rows(ddts_total) %>% arrange(stationid, compound) %>% @@ -803,7 +1824,7 @@ chemdata_prep <- function(chem, logfile = file.path(getwd(), 'logs', format(Sys. ) ) - writelog("*** DATA *** final preprocessed chemistry data in chemdata-preprocessed-final.csv", logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog("*** DATA *** final preprocessed chemistry data in chemdata-preprocessed-final.csv", logfile = logfile, verbose = verbose) writelog(chemdata , file.path(dirname(logfile), 'chemdata-preprocessed-final.csv'), filetype = 'csv', verbose = verbose) diff --git a/R/logutils.R b/R/logutils.R index f76decd..df8693e 100644 --- a/R/logutils.R +++ b/R/logutils.R @@ -12,8 +12,9 @@ #' writelog("# --- Test Log Statement --- #", 'logs/log.txt') #' @export -writelog <- function(content, logfile, filetype = 'text', append = T, verbose = T, prefix = NULL, include.row.names = F, code = NULL, data = NULL) { +writelog <- function(content, logfile, filetype = 'text', append = T, verbose = T, prefix = NULL, include.row.names = F, code = NULL, data = NULL, echo.code = TRUE, pageLength = 10) { + if (!verbose) return(NULL) # Only text and csv (for now - ...... don't see any reason why we would support any other type at this point) if ( !(filetype %in% c('text','csv')) ) { @@ -34,21 +35,23 @@ writelog <- function(content, logfile, filetype = 'text', append = T, verbose = # Mainly for R Markdown - write code snippets to be able to view - also data table sections if (!is.null(code)) { - write("```{r}\n", file = file, append = append) - write(code, file = file, append = append) - write("```\n", file = file, append = append) + if (echo.code) { + write("```{r}\n", file = logfile, append = append) + } else { + write("```{r echo=FALSE}\n", file = logfile, append = append) + } + write(code, file = logfile, append = append) + write("```\n", file = logfile, append = append) } if (!is.null(data)) { - write("```{r echo=FALSE}\n", file = file, append = append) - write("datatable(data, options = list(pageLength = 5, autoWidth = TRUE))\n", file = file, append = append) - write("```\n", file = file, append = append) - } + write("```{r echo=FALSE}\n", file = logfile, append = append) + data_name <- deparse(substitute(data)) + write(paste0("datatable(", data_name, ", options = list(pageLength = ", pageLength, ", autoWidth = TRUE))\n"), file = logfile, append = append) + write("```\n", file = logfile, append = append) + } } - - - } @@ -66,7 +69,7 @@ writelog <- function(content, logfile, filetype = 'text', append = T, verbose = #' @examples #' init.log(logfile, sys.call()) #' @export -init.log <- function(logfile, base.func.name, type = 'text', current.time = Sys.time(), is.base.func = T, verbose = T, title = 'Log') { +init.log <- function(logfile, base.func.name, type = 'text', current.time = Sys.time(), is.base.func = T, verbose = T, title = 'Log', libraries = c('rmarkdown')) { logfile = path.expand(logfile) logdir = dirname(logfile) @@ -94,12 +97,15 @@ init.log <- function(logfile, base.func.name, type = 'text', current.time = Sys. logfile ) } else { - write("---\ntitle: \"", title, "\"\noutput: html_document\n---\n\n", file = file) + write("---\ntitle: \"R Markdown Log\"\noutput: html_document\n---", file = logfile) + write('```{r echo=FALSE}\n', file = logfile, append = TRUE) + for (lib in libraries) { + write(paste0('library(', lib, ')'), file = logfile, append = TRUE) + } + write('```', file = logfile, append = TRUE) } } - - return(NULL) } @@ -114,9 +120,11 @@ init.log <- function(logfile, base.func.name, type = 'text', current.time = Sys. #' @param include.rownames should rownames of the dataframe be included in the CSV file? #' #' @export -create_download_link <- function(data, logfile, filename, linktext = 'Download the data', include.rownames = FALSE){ - csvfile <- file.path(dirname(logfile), paste0(filename)) - write.csv(data, csvfile, row.names = include.rownames) - write(paste0("[", linktext , "](./", basename(csvfile), ")"), file = logfile, append = TRUE) +create_download_link <- function(data, logfile, filename, linktext = 'Download the data', include.row.names = FALSE, verbose = TRUE){ + if(verbose){ + csvfile <- file.path(dirname(logfile), paste0(filename)) + write.csv(data, csvfile, row.names = include.row.names) + write(paste0("[", linktext , "](./", basename(csvfile), ")"), file = logfile, append = TRUE) + } } From 3d33c23232d2e69522668ca3347ac9ebe136f8d4 Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Fri, 7 Jun 2024 13:04:41 -0700 Subject: [PATCH 03/10] fix LRM table, fine tune the R Markdown logging --- R/chemistry.R | 586 +++++++++++++++++++++++++++++++++++-------- R/logutils.R | 4 +- data/lrm_table.RData | Bin 476 -> 476 bytes 3 files changed, 481 insertions(+), 109 deletions(-) diff --git a/R/chemistry.R b/R/chemistry.R index 72554c4..39244d8 100644 --- a/R/chemistry.R +++ b/R/chemistry.R @@ -51,6 +51,8 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( create_download_link(data = chemdata.lrm.input, logfile = logfile, filename = 'LRM_InitialInputData.csv', linktext = 'Download Initial Input to Chem LRM Function', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- Make the call to the Preprocessing function (If not already preprocessed) ---- @@ -62,9 +64,17 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( verbose = verbose ) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + + # Actually call the function chemdata.lrm.input <- chemdata_prep(chemdata.lrm.input, logfile = logfile, verbose = verbose) + + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + # Create code block and download link to the preprocessed data writelog( "\n#### Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", @@ -116,7 +126,8 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( - + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) writelog("\n### Manipulate the LRM chem data per Technical Manual page 37-40", logfile = logfile, verbose = verbose) @@ -159,6 +170,10 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + + # -------- Step 2 - Get P Values for each analyte of each station -------- # Write to log @@ -191,6 +206,12 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( + # Log the separation space between steps + writelog("\n___\n___\n___\n____" , logfile = logfile, verbose = verbose) + + + + # -------- Step 3 - Get Max P Value -------- writelog("\n#### Step 3", logfile = logfile, verbose = verbose) writelog("\n##### Group by StationID (this function and R package overall assumes you are feeding it data for one sediment sample per stationid)", logfile = logfile, verbose = verbose) @@ -233,6 +254,102 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + + + + + # ---- Main Intermediate CA LRM Calculation QA Step ---- + + writelog("\n#### *Main Intermediate CA LRM Calculation QA Step*\n", logfile = logfile, verbose = verbose) + writelog("\n- Just basically selecting certain columns, and 'R Binding' the pmax value as 'SAMPLE_PMAX' for the whole station\n", logfile = logfile, verbose = verbose) + writelog("\n**The ouput from this intermediate calculation step is designed to make an easier comparison with output from the Excel Tool**\n", logfile = logfile, verbose = verbose) + + # Actually execute the code + lrm.main.intermediate.qa.calc.step <- chemdata_lrm2 %>% + select( + StationID, + CAP_parameter = AnalyteName, + Value = p + ) %>% + mutate( + CAP_parameter = paste0( + 'p_', + case_when( + CAP_parameter == 'alpha-Chlordane' ~ 'CHLORDAN_A', + CAP_parameter == 'trans-Nonachlor' ~ 'NONACHL_TR', + CAP_parameter == 'PCBs_total' ~ 'PCB_SUM', + CAP_parameter == "4,4'-DDT" ~ "4,4'_DDT", + TRUE ~ toupper(CAP_parameter) + ) + ) + ) %>% + rbind( + chemdata_lrm3 %>% + mutate( + CAP_parameter = 'SAMPLE_PMAX' + ) %>% + select( + StationID, + CAP_parameter, + Value = Score + ) + ) %>% + arrange(StationID, CAP_parameter) + + # Write code portion of the logs + writelog( + "", + code = " + lrm.main.intermediate.qa.calc.step <- chemdata_lrm2 %>% + select( + StationID, + CAP_parameter = AnalyteName, + Value = p + ) %>% + mutate( + CAP_parameter = paste0( + 'p_', + case_when( + CAP_parameter == 'alpha-Chlordane' ~ 'CHLORDAN_A', + CAP_parameter == 'trans-Nonachlor' ~ 'NONACHL_TR', + CAP_parameter == 'PCBs_total' ~ 'PCB_SUM', + CAP_parameter == \"4,4'-DDT\" ~ \"4,4'_DDT\", + TRUE ~ toupper(CAP_parameter) + ) + ) + ) %>% + rbind( + chemdata_lrm3 %>% + mutate( + CAP_parameter = 'SAMPLE_PMAX' + ) %>% + select( + StationID, + CAP_parameter, + Value = Score + ) + ) %>% + arrange(StationID, CAP_parameter) + ", + data = lrm.main.intermediate.qa.calc.step, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = lrm.main.intermediate.qa.calc.step, logfile = logfile, filename = 'LRM_IntermediateCalcQA.csv', linktext = 'Download Main LRM Intermediate Calculation QA Data', verbose = verbose) + + + + + + # Log the separation space between steps + writelog("\n___\n___\n___\n____" , logfile = logfile, verbose = verbose) + + + + # -------- Step 4 - Assign the category and category score -------- @@ -304,6 +421,8 @@ LRM <- function(chemdata.lrm.input, preprocessed = F, logfile = file.path(getwd( # Serve it up for download create_download_link(data = chemdata_lrm.final, logfile = logfile, filename = 'LRM_Final.csv', linktext = 'Download Final LRM Data (Within LRM Function)', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) writelog("##### END Chem LRM Function\n", logfile = logfile, verbose = verbose) @@ -379,6 +498,9 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( create_download_link(data = chemdata.csi.input, logfile = logfile, filename = 'CSI_InitialInputData.csv', linktext = 'Download Initial Input to Chem CSI Function', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + # ---- Make the call to the Preprocessing function (If not already preprocessed) ---- if (!preprocessed) { @@ -389,9 +511,17 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( verbose = verbose ) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + # Actually call the function chemdata.csi.input <- chemdata_prep(chemdata.csi.input, logfile = logfile, verbose = verbose) + + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + + # Create code block and download link to the preprocessed data writelog( "\n#### Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", @@ -419,6 +549,9 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( pageLength = 12 ) + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + # Here we begin to manipulate the CSI chem data per Technical Manual page 40-42 writelog("\n### Manipulate the CSI chem data per Technical Manual page 40-42\n", logfile = logfile, verbose = verbose) @@ -449,6 +582,8 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( create_download_link(data = chemdata_csi, logfile = logfile, filename = 'CSI_Step0.csv', linktext = 'Download CSI Step0', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- CSI Calclulation Step Description ---- @@ -475,7 +610,8 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( - + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) @@ -537,6 +673,10 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( create_download_link(data = chemdata_csi1, logfile = logfile, filename = 'CSI_Step1.csv', linktext = 'Download CSI Step1', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + + # ---- Calculate CSI Step 2 ---- # Write to the log file @@ -581,6 +721,104 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( create_download_link(data = chemdata_csi2, logfile = logfile, filename = 'CSI_Step2.csv', linktext = 'Download CSI Step2', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___\n____" , logfile = logfile, verbose = verbose) + + + + + # ---- Main Intermediate CSI Calculation QA Step ---- + + writelog("\n#### *Main Intermediate CSI Calculation QA Step*\n", logfile = logfile, verbose = verbose) + writelog("\n- Join output from Step 1 and 2\n", logfile = logfile, verbose = verbose) + writelog("\n- (step1 %>% left_join step2)\n", logfile = logfile, verbose = verbose) + writelog("\n**The ouput from this intermediate calculation step is designed to make an easier comparison with output from the Excel Tool**\n", logfile = logfile, verbose = verbose) + + # Actually execute the code + csi.main.intermediate.qa.calc.step <- chemdata_csi1 %>% + select( + StationID, + Chemical = AnalyteName, + Weight = weight, + Category = exposure_score, + CCS = weighted_score + ) %>% + left_join( + chemdata_csi2 %>% + select( + StationID, + `Sum CCS` = weighted_score_sum, + `Sum Weight` = weight, + `Weighted Mean` = Score + ) + , + by = 'StationID' + ) %>% + mutate( + Chemical = case_when( + Chemical == 'alpha-Chlordane' ~ 'Alpha Chlordane', + Chemical == 'gamma-Chlordane' ~ 'Gamma Chlordane', + Chemical == 'DDDs_total' ~ 'DDDs, total', + Chemical == 'DDEs_total' ~ 'DDEs, total', + Chemical == 'DDTs_total' ~ 'DDTs, total', + Chemical == 'PCBs_total' ~ 'PCBs, total', + TRUE ~ Chemical + ) + ) %>% + arrange(StationID, Chemical) + + # Write code portion of the logs + writelog( + "", + code = ' + csi.main.intermediate.qa.calc.step <- chemdata_csi1 %>% + select( + StationID, + Chemical = AnalyteName, + Weight = weight, + Category = exposure_score, + CCS = weighted_score + ) %>% + left_join( + chemdata_csi2 %>% + select( + StationID, + `Sum CCS` = weighted_score_sum, + `Sum Weight` = weight, + `Weighted Mean` = Score + ) + , + by = \'StationID\' + ) %>% + mutate( + Chemical = case_when( + Chemical == \'alpha-Chlordane\' ~ \'Alpha Chlordane\', + Chemical == \'gamma-Chlordane\' ~ \'Gamma Chlordane\', + Chemical == \'DDDs_total\' ~ \'DDDs, total\', + Chemical == \'DDEs_total\' ~ \'DDEs, total\', + Chemical == \'DDTs_total\' ~ \'DDTs, total\', + Chemical == \'PCBs_total\' ~ \'PCBs, total\', + TRUE ~ Chemical + ) + ) %>% + arrange(StationID, Chemical) + ', + data = csi.main.intermediate.qa.calc.step, + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(data = csi.main.intermediate.qa.calc.step, logfile = logfile, filename = 'CSI_IntermediateCalcQA.csv', linktext = 'Download Main CSI Intermediate Calculation QA Data', verbose = verbose) + + + + + # Log the separation space between steps + writelog("\n___\n___\n___\n____" , logfile = logfile, verbose = verbose) + + + + # ---- Calculate CSI Step 3 ---- # write to the logs writelog("\n##### Execute Step 3:\n", logfile = logfile, verbose = verbose) @@ -690,14 +928,13 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( #' chem.sqo(chem_sampledata) # get scores and see output #' #' @export -chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'chemlog.Rmd' ), verbose = T) { +chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'chemlog.Rmd' ), verbose = T, logtitle = 'Chemistry SQO Logs') { # ---- Initialize Logging ---- - init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) + init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose, title = logtitle) hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) - writelog("\n# Chemistry SQO Logs\n", logfile = logfile, verbose = verbose) - writelog("\n## Chemistry SQO Main Function\n", logfile = logfile, verbose = verbose) + writelog("\n# Chemistry SQO Main Function\n", logfile = logfile, verbose = verbose) # ---- Save the raw input to an RData file (for the sake of those who want the auditing logs) ---- @@ -708,7 +945,7 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # Display raw input data, create a download link for the knitted final RMarkdown output writelog( - "\n### Raw input to chem.sqo:", + "\n## Raw input to chem.sqo:", logfile = logfile, code = paste0("load('", rawinput.filename, "') ### This will load a dataframe called 'chemdata' into your environment"), verbose = verbose @@ -720,17 +957,21 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # ---- Make the call to the Preprocessing function ---- # Write to the log file writelog( - "\n### Preprocessing chemistry data (Details within chemdata_prep to be shown later as well):\n", + "\n## Preprocessing chemistry data (Details within chemdata_prep to be shown later as well):\n", logfile = logfile, verbose = verbose ) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + + # Actually call the function chemdata <- chemdata_prep(chemdata, logfile = logfile, verbose = verbose) # Create code block and download link to the preprocessed data writelog( - "\n### Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", + "\n## Chemdata Pre processing function is finished executing - Here is its final output along with a code block (for R Studio users):", logfile = logfile, code = 'chemdata <- chemdata_prep(chemdata, verbose = FALSE)', data = chemdata, @@ -738,27 +979,34 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t ) create_download_link(data = chemdata, logfile = logfile, filename = 'ChemSQO-PreProcessedInput.csv', linktext = 'Download Preprocessed Input to Chem SQO Function (after calling chemdata_prep)', verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- Make the call to the LRM function ---- # Write to the log file writelog( - "\n### Call LRM function within chem.sqo....\n", + "\n## Call LRM function within chem.sqo....\n", logfile = logfile, verbose = verbose ) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) # Actually call it chemdata_lrm <- LRM(chemdata, preprocessed = T, logfile = logfile, verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + # Create code block and download link writelog( - "\n### LRM Function finished executing", + "\n## LRM Function finished executing", logfile = logfile, verbose = verbose ) writelog( - "##### Here is its final output along with a code block (for R Studio users):", + "#### Here is its final output along with a code block (for R Studio users):", logfile = logfile, code = 'chemdata_lrm <- LRM(chemdata, preprocessed = TRUE, verbose = FALSE)', data = chemdata_lrm, @@ -771,22 +1019,28 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # ---- Make the call to the CSI function ---- # Write to the log file writelog( - "\n### Call CSI function within chem.sqo\n", + "\n## Call CSI function within chem.sqo\n", logfile = logfile, verbose = verbose ) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + # Actually call it chemdata_csi <- CSI(chemdata, preprocessed = T, logfile = logfile, verbose = verbose) + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) + # Create code block and download link writelog( - "\n### CSI Function finished executing", + "\n## CSI Function finished executing", logfile = logfile, verbose = verbose ) writelog( - "##### Here is its final output along with a code block (for R Studio users):", + "\n### Here is its final output along with a code block (for R Studio users):", logfile = logfile, code = 'chemdata_csi <- CSI(chemdata, preprocessed = TRUE, verbose = FALSE)', data = chemdata_csi, @@ -795,7 +1049,8 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t create_download_link(data = chemdata_csi, logfile = logfile, filename = 'ChemSQO-CSI-Output.csv', linktext = 'Download Final CSI Output', verbose = verbose) - + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- Subsequent steps after CSI and LRM ---- @@ -814,7 +1069,7 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # Write to the logs and make the code block and datatable display writelog( - "\n#### First Subsequent Step: RBind the CSI and LRM Dataframes", + "\n### First Subsequent Step: RBind the CSI and LRM Dataframes", logfile = logfile, code = ' combined1 <- rbind(chemdata_lrm, chemdata_csi) %>% @@ -828,7 +1083,8 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t create_download_link(data = combined1, logfile = logfile, filename = 'CSI_LRM_Combine_step1.csv', linktext = 'Download CSI_LRM_Combine_step1.csv', verbose = verbose) - + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- Second Subsequent Step: Group by StationID and take the average of CSI and LRM ---- @@ -852,7 +1108,7 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # Write to the log writelog( - "\n#### Second Subsequent Step: Group by StationID and take the average of CSI and LRM", + "\n### Second Subsequent Step: Group by StationID and take the average of CSI and LRM", logfile = logfile, code = ' combined2 <- combined1 %>% @@ -879,7 +1135,8 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t create_download_link(data = combined2, logfile = logfile, filename = 'CSI_LRM_Combine_step2.csv', linktext = 'Download CSI_LRM_Combine_step2.csv', verbose = verbose) - + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- Third Subsequent Step: Convert numeric score to the human readable category ---- @@ -902,7 +1159,7 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # Write to the log writelog( - "\n#### Third Subsequent Step: Convert numeric score to the human readable category", + "\n### Third Subsequent Step: Convert numeric score to the human readable category", logfile = logfile, code = ' combined3 <- combined2 %>% @@ -927,7 +1184,8 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # Make the download link create_download_link(data = combined3, logfile = logfile, filename = 'CSI_LRM_Combine_step3.csv', linktext = 'Download CSI_LRM_Combine_step3.csv', verbose = verbose) - + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) # ---- Fourth Subsequent Step: Combine CSI, LRM, and Integrated Score dataframes, convert factors to characters, and order it by StationID ---- combined.final <- combined3 %>% @@ -939,7 +1197,7 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t # Write to the log writelog( - "\n#### Fourth Subsequent Step: Combine CSI, LRM, and Integrated Score dataframes, convert factors to characters, and order it by StationID", + "\n### Fourth Subsequent Step: Combine CSI, LRM, and Integrated Score dataframes, convert factors to characters, and order it by StationID", logfile = logfile, code = ' combined.final <- combined3 %>% @@ -954,7 +1212,12 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t ) # Make the download link - create_download_link(data = combined.final, logfile = logfile, filename = 'CSI_LRM_Combine_step3.csv', linktext = 'Download CSI_LRM_Combine_step3.csv', verbose = verbose) + create_download_link(data = combined.final, logfile = logfile, filename = 'FinalChemSQOScores.csv', linktext = 'Download FinalChemSQOScores.csv', verbose = verbose) + + + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + writelog("\nEND Chem SQO Function\n", logfile = logfile, verbose = verbose) @@ -976,7 +1239,7 @@ chem.sqo <- function(chemdata, logfile = file.path(getwd(), 'logs', format(Sys.t #' #' @usage chemdata_prep(chemdata) #' -#' @param chemdata a dataframe with the following columns: +#' @param chemdata_prep.input a dataframe with the following columns: #' #' \code{StationID}, #' @@ -1012,17 +1275,16 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log "\n### Initial input to chemdata_prep:", logfile = logfile, code = paste0("load('", rawinput.filename, "')"), - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), verbose = verbose ) create_download_link(data = chemdata_prep.input, logfile = logfile, filename = 'chemdata_prep.input.csv', linktext = 'Download Initial Input to Chem Preprocessing Function', verbose = verbose) - # Here chemdata consists of data in the same format as our database, with the columns - # stationid, analytename, result, rl, mdl - + # Log the separation space between steps + writelog("\n___\n___\n___\n___" , logfile = logfile, verbose = verbose) @@ -1037,7 +1299,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log "", logfile = logfile, code = 'names(chemdata_prep.input) <- names(chemdata_prep.input) %>% tolower()', - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), verbose = verbose, pageLength = 5 ) @@ -1045,6 +1307,8 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1074,7 +1338,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log chemdata_prep.input <- chemdata_prep.input %>% rename(fieldrep = fielddup) } ", - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), verbose = verbose, pageLength = 5 ) @@ -1083,7 +1347,8 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log - + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1104,7 +1369,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log chemdata_prep.input <- chemdata_prep.input %>% rename(labrep = labreplicate) } ", - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, pageLength = 5 @@ -1115,7 +1380,8 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log - + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1171,7 +1437,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog(msg, logfile = logfile, verbose = verbose) } ", - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, pageLength = 5 @@ -1182,7 +1448,8 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log - + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1224,7 +1491,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log warning(msg) } ', - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, pageLength = 5 @@ -1234,6 +1501,11 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + + + # ---- Step 6: Check if the resulting chemdata_prep.input is empty after filtering ---- writelog( "\n#### Step 6: Check if the resulting chemdata_prep.input is empty after filtering", @@ -1252,7 +1524,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log stop("In chemdata_prep - chemistry input data is empty after filtering sampletypecode == Result and labrep == 1 and fieldrep == 1") } ', - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, pageLength = 5 @@ -1262,6 +1534,10 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + + # ---- Step 7: Drop duplicates ---- writelog( @@ -1287,7 +1563,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog( "\n##### Chemdata Prep Input Data with Duplicates Removed:", code = 'chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ]', - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, pageLength = 5 @@ -1298,6 +1574,10 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + + # ---- Step 8: Convert Result, RL, MDL to numeric fields ---- writelog("\n#### Convert Result, RL, MDL to numeric fields", logfile = logfile, verbose = verbose) @@ -1323,7 +1603,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log ) %>% filter(!is.na(stationid)) ', - data = chemdata_prep.input, + data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, pageLength = 5 @@ -1334,7 +1614,8 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log - + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1455,26 +1736,8 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log - - - - - writelog("*** DATA *** Before filtering, grouping, converting missing result values - chemdata-preprocessing-step5.csv",logfile = logfile,verbose = verbose,prefix = hyphen.log.prefix) - writelog(chemdata_prep.input, file.path(dirname(logfile), 'chemdata-preprocessing-step5.csv'), filetype = 'csv', verbose = verbose) - - writelog("filtering chemistry data to necessary analytes - etc", logfile = logfile, verbose = verbose) - writelog("from step 5 to 6 this is what is performed:", logfile = logfile, verbose = verbose) - writelog("Create a new column called compound which represents the group of the analyte (High/Low PAH, Total PCB, DDD, DDE. (DDTs not handled in this step)", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - writelog("remove records where compound is NA", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - writelog("Handle missing result values according to guidance of pages 30 and 31 of Technical manual", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - writelog("-- (half MDL for the analytes that are not part of a group, otherwise set to zero in the grouped analytes which get summed)", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - writelog("Multiply PCBs by 1.72", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - writelog("Sum results of grouped constituents/analytes", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - writelog("If all values in the group are non detects - take the max RL", logfile = logfile, verbose = verbose, prefix = paste0('-',hyphen.log.prefix)) - - - - + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1486,7 +1749,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log ) # Actually execute the code - chemdata <- chemdata_prep.input %>% + chemdata.filtered.no.DDT <- chemdata_prep.input %>% # create a new column called compound. This is what we will group by, mutate( compound = case_when( @@ -1513,7 +1776,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog( "", code = ' - chemdata <- chemdata_prep.input %>% + chemdata.filtered.no.DDT <- chemdata_prep.input %>% # create a new column called compound. This is what we will group by, mutate( compound = case_when( @@ -1536,15 +1799,17 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log !is.na(compound) ) ', - data = chemdata, + data = chemdata.filtered.no.DDT %>% head(15), logfile = logfile, verbose = verbose ) # Serve it up for download - create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step9.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Filtered (No DDT)') + create_download_link(chemdata.filtered.no.DDT, logfile = logfile, filename = 'chemdata_prep.input.step9.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Filtered (No DDT)') + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) @@ -1564,11 +1829,11 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log acceptable_units_ppb <- c('ng/g', 'ng/g dw', 'ppb') # Check if the units column exists - if ('units' %in% names(chemdata)) { + if ('units' %in% names(chemdata.filtered.no.DDT)) { # Check for rows that do not meet the criteria - incorrect_rows <- chemdata[ - (chemdata$analytename %in% ppm_analytes & !(chemdata$units %in% acceptable_units_ppm)) | - (!chemdata$analytename %in% ppm_analytes & !(chemdata$units %in% acceptable_units_ppb)), ] + incorrect_rows <- chemdata.filtered.no.DDT[ + (chemdata.filtered.no.DDT$analytename %in% ppm_analytes & !(chemdata.filtered.no.DDT$units %in% acceptable_units_ppm)) | + (!chemdata.filtered.no.DDT$analytename %in% ppm_analytes & !(chemdata.filtered.no.DDT$units %in% acceptable_units_ppb)), ] # If there are incorrect rows, stop and display a message if (nrow(incorrect_rows) > 0) { @@ -1598,11 +1863,11 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log acceptable_units_ppb <- c(\'ng/g\', \'ng/g dw\', \'ppb\') # Check if the units column exists - if (\'units\' %in% names(chemdata)) { + if (\'units\' %in% names(chemdata.filtered.no.DDT)) { # Check for rows that do not meet the criteria - incorrect_rows <- chemdata[ - (chemdata$analytename %in% ppm_analytes & !(chemdata$units %in% acceptable_units_ppm)) | - (!chemdata$analytename %in% ppm_analytes & !(chemdata$units %in% acceptable_units_ppb)), ] + incorrect_rows <- chemdata.filtered.no.DDT[ + (chemdata.filtered.no.DDT$analytename %in% ppm_analytes & !(chemdata.filtered.no.DDT$units %in% acceptable_units_ppm)) | + (!chemdata.filtered.no.DDT$analytename %in% ppm_analytes & !(chemdata.filtered.no.DDT$units %in% acceptable_units_ppb)), ] # If there are incorrect rows, stop and display a message if (nrow(incorrect_rows) > 0) { @@ -1624,11 +1889,16 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + + + # ---- Step 10: Deal with missing values/Non Detects ---- writelog("\n#### Step 10: Deal with missing values/Non Detects",logfile = logfile,verbose = verbose) writelog("\n##### Dealing with missing result values (non detects)",logfile = logfile,verbose = verbose) writelog("\n##### NA or negative result values are treated as missing values (covers -88, -99 or actual null values)",logfile = logfile,verbose = verbose) - chemdata <- chemdata %>% + chemdata.step10 <- chemdata.filtered.no.DDT %>% mutate( result = case_when( # This is for dealing with Non detects (Page 37 of SQO Manual, Paragraph titled Data Preparation) @@ -1650,7 +1920,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog( "", code = ' - chemdata <- chemdata %>% + chemdata.step10 <- chemdata.filtered.no.DDT %>% mutate( result = case_when( # This is for dealing with Non detects (Page 37 of SQO Manual, Paragraph titled Data Preparation) @@ -1669,19 +1939,25 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log ) ) ', - data = chemdata, + data = chemdata.step10 %>% head(15), logfile = logfile, verbose = verbose ) # Serve it up for download - create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step10.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 10 (Dealing with non detects)') + create_download_link(chemdata.step10, logfile = logfile, filename = 'chemdata_prep.input.step10.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 10 (Dealing with non detects)') + + + + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + # ---- Step 11: Multiply PCB Values by 1.72 ---- writelog("\n#### Step 11: Multiply PCB Values by 1.72", logfile = logfile, verbose = verbose) # Actually execute the code - chemdata <- chemdata %>% + chemdata.step11 <- chemdata.step10 %>% mutate( result = if_else( # CASQO manual page 36 (3rd edition June 2021), below table 3.4 - PCB result value gets multiplied by 1.72 @@ -1694,7 +1970,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog( "", code = ' - chemdata <- chemdata %>% + chemdata.step11 <- chemdata.step10 %>% mutate( result = if_else( # CASQO manual page 36 (3rd edition June 2021), below table 3.4 - PCB result value gets multiplied by 1.72 @@ -1703,25 +1979,25 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log ) ) ', - data = chemdata, + data = chemdata.step11 %>% head(15), logfile = logfile, verbose = verbose ) # Serve it up for download - create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step11.pcb.mult.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 11 (Multiply PCBs by 1.72)') - - + create_download_link(chemdata.step11, logfile = logfile, filename = 'chemdata_prep.input.step11.pcb.mult.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 11 (Multiply PCBs by 1.72)') + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) # ---- Step 12: If all are non detects in the group, assign it the max of the RLs ---- - writelog("Step 12: If all are non detects in the group, assign it the max of the RLs", logfile = logfile, verbose = verbose) + writelog("\n#### Step 12: If all are non detects in the group, assign it the max of the RLs", logfile = logfile, verbose = verbose) # Actually execute the code - chemdata <- chemdata %>% + chemdata.step12 <- chemdata.step11 %>% group_by( stationid, compound ) %>% @@ -1739,7 +2015,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog( "\n##### Group by stationid and compound and sum the result values - if all are missing take the Max RL (Page 36 from Technical Manual)", code = ' - chemdata <- chemdata %>% + chemdata.step12 <- chemdata.step11 %>% group_by( stationid, compound ) %>% @@ -1753,25 +2029,28 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log ) %>% ungroup() ', - data = chemdata, + data = chemdata.step12 %>% head(15), logfile = logfile, verbose = verbose ) # Serve it up for download - create_download_link(chemdata, logfile = logfile, filename = 'chemdata_prep.input.step12.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 12 (Handle case where all in analytegroup are Non detects)') + create_download_link(chemdata.step12, logfile = logfile, filename = 'chemdata_prep.input.step12.no.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data Step 12 (Handle case where all in analytegroup are Non detects)') + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + # ---- Step 13: Handle DDTs ---- + writelog("\n#### Step 13:Handle DDTs (according to page 36 of SQO technical manual)", logfile = logfile, verbose = verbose) + writelog("\n- 13a. Replace missing values with zero", logfile = logfile, verbose = verbose) + writelog("\n- 13b. Sum them - if all are non-detects then take the max RL", logfile = logfile, verbose = verbose) - - writelog("Handle DDTs (according to page 30 of SQO technical manual", logfile = logfile, verbose = verbose) - writelog("-- replace missing values with zero", logfile = logfile, verbose = verbose) - writelog("-- Sum them - if all are non-detects then take the max RL", logfile = logfile, verbose = verbose) - ddts_total <- chemdata_prep.input %>% + # Code + ddts_total.13a <- chemdata_prep.input %>% filter(grepl("DDT",analytename)) %>% mutate( compound = "DDTs_total", @@ -1781,20 +2060,71 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log # "Compounds qualified as non-detected are treated as having a concentration of zero for the purpose of summing" coalesce(result, -88) < 0, 0, result ) - ) %>% + ) + writelog( + "\n##### 13a: Replace non detects with zero before grouping and summing", + code = ' + ddts_total.13a <- chemdata_prep.input %>% + filter(grepl("DDT",analytename)) %>% + mutate( + compound = "DDTs_total", + result = if_else( + # For the summed group of constituents, we get the directions of how to deal with them in page 30 of the SQO Manual + # First paragraph below table 3.4 + # "Compounds qualified as non-detected are treated as having a concentration of zero for the purpose of summing" + coalesce(result, -88) < 0, 0, result + ) + ) + ', + data = ddts_total.13a %>% head(15), + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(ddts_total.13a, logfile = logfile, filename = 'chemdata_prep.input.ddt.13a.csv', verbose = verbose, linktext = 'Chem Preprocessed Data (Filtered to DDTs - before grouping and summing)') + + + + # Code + ddts_total <- ddts_total.13a %>% group_by(stationid,compound) %>% summarize( result = if_else( - # if the sum of the results is zero, assign it the max of the RL's + # if the sum of the results is zero, assign it the max of the RLs # Page 30 of SQO Plan, first paragraph below table 3.4 # "If all components of a sum are non-detected, then the highest reporting limit of any one compound in the group should be used to represent the sum value." sum(result, na.rm = T) != 0, sum(result, na.rm = T), max(rl) ) ) %>% ungroup() - writelog("*** DATA *** chemdata after performing these steps may be found in chemdata-preprocessing-step7 (DDTs).csv", logfile = logfile, verbose = verbose) - writelog(ddts_total, file.path(dirname(logfile), 'chemdata-preprocessing-step7 (DDTs).csv'), filetype = 'csv', verbose = verbose) + writelog( + "\n##### Group by stationid and compound (only one compound DDTs_total) and take the sum", + code = ' + ddts_total <- ddts_total.13a %>% + group_by(stationid,compound) %>% + summarize( + result = if_else( + # if the sum of the results is zero, assign it the max of the RLs + # Page 30 of SQO Plan, first paragraph below table 3.4 + # "If all components of a sum are non-detected, then the highest reporting limit of any one compound in the group should be used to represent the sum value." + sum(result, na.rm = T) != 0, sum(result, na.rm = T), max(rl) + ) + ) %>% + ungroup() + ', + data = ddts_total %>% head(15), + logfile = logfile, + verbose = verbose + ) + # Serve it up for download + create_download_link(ddts_total, logfile = logfile, filename = 'chemdata_prep.input.ddt.csv', verbose = verbose, linktext = 'Chem Preprocessed Data (Filtered to DDTs)') + + + # Log the separation space between steps + writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) + + # ---- Step 14: Fix Rounding ---- # Conversation with Darrin on April 16th 2020 # We need to fix the rounding in this thing. # The fixes may also need to be implemented in the chemdata_prep function @@ -1807,16 +2137,46 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log # HPAH - 1 # LPAH - 1 # All the rest - 2 - writelog("Combine DDT data with rest of chem data - perform necessary rounding", logfile = logfile, verbose = verbose) - writelog("Copper, Lead, Zinc, High/Low PAHs will be rounded to 1 decimal place; All others will be rounded to 2", logfile = logfile, verbose = verbose) - chemdata <- chemdata %>% + writelog("\n#### Step 14: Combine all analytes and fix rounding", logfile = logfile, verbose = verbose) + writelog("\n##### Copper, Lead, Zinc, High/Low PAHs will be rounded to 1 decimal place; All others will be rounded to 2", logfile = logfile, verbose = verbose) + + # Code + # Step 13 dealt only with DDTs - essentially ddts_total is the step 13 dataframe + + # write to the logs + writelog("\n##### Combine data with DDTs", logfile = logfile, verbose = verbose) + + # Actually perform the code + chemdata.step14a <- chemdata.step12 %>% bind_rows(ddts_total) %>% arrange(stationid, compound) %>% rename( StationID = stationid, AnalyteName = compound, Result = result - ) %>% + ) + + # Write code to the R Markdown file + writelog( + "", + code = ' + chemdata.step14a <- chemdata.step12 %>% + bind_rows(ddts_total) %>% + arrange(stationid, compound) %>% + rename( + StationID = stationid, + AnalyteName = compound, + Result = result + ) + ', + data = chemdata.step14a %>% head(15), + logfile = logfile, + verbose = verbose + ) + + + # Actually perform the rounding + chemdata.preprocessed.final <- chemdata.step14a %>% mutate( Result = case_when( AnalyteName %in% c('Copper','Lead','Zinc','HPAH','LPAH') ~ round(Result, digits = 1), @@ -1824,14 +2184,26 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log ) ) - writelog("*** DATA *** final preprocessed chemistry data in chemdata-preprocessed-final.csv", logfile = logfile, verbose = verbose) - writelog(chemdata - , file.path(dirname(logfile), 'chemdata-preprocessed-final.csv'), filetype = 'csv', verbose = verbose) + # Write it to the R Markdown logs + writelog( + "Final Chemdata Prep output:", + code = ' + chemdata.preprocessed.final <- chemdata.step14a %>% + mutate( + Result = case_when( + AnalyteName %in% c(\'Copper\',\'Lead\',\'Zinc\',\'HPAH\',\'LPAH\') ~ round(Result, digits = 1), + TRUE ~ round(Result, digits = 2) + ) + ) + ', + data = chemdata.preprocessed.final %>% head(15), + logfile = logfile, + verbose = verbose + ) - writelog("\nEND Function: chemdata_prep\n", logfile = logfile, verbose = verbose) - writelog("----------------------------------------------------------------------------------------------------\n", logfile = logfile, verbose = verbose) + writelog("\n#### END Function: chemdata_prep\n", logfile = logfile, verbose = verbose) - return(chemdata) + return(chemdata.preprocessed.final) } diff --git a/R/logutils.R b/R/logutils.R index df8693e..5d170b2 100644 --- a/R/logutils.R +++ b/R/logutils.R @@ -97,8 +97,8 @@ init.log <- function(logfile, base.func.name, type = 'text', current.time = Sys. logfile ) } else { - write("---\ntitle: \"R Markdown Log\"\noutput: html_document\n---", file = logfile) - write('```{r echo=FALSE}\n', file = logfile, append = TRUE) + write( paste0("---\ntitle: \"", title ,"\"\noutput: html_document\n---", collapse = ''), file = logfile) + write('```{r setup, include=FALSE, message=FALSE, warning=FALSE}\n', file = logfile, append = TRUE) for (lib in libraries) { write(paste0('library(', lib, ')'), file = logfile, append = TRUE) } diff --git a/data/lrm_table.RData b/data/lrm_table.RData index 29f77306810c0c8e878ba28738b2373abd209730..46481e32d6f45399c6cf69286fde46b69f83f875 100644 GIT binary patch delta 37 tcmcb^e1~~L1^d4vbDDZwVkg#%v3Uk2#cf=-akeER$J?Oq=A}#w3;;Zx4>AA% delta 37 tcmcb^e1~~L1$*7`IZZuBDkj#8v3Ue0#TD+^INOqu!@v5 Date: Fri, 7 Jun 2024 17:11:02 -0700 Subject: [PATCH 04/10] add rounding to CSI input data according to convention Darrin gave --- R/SQO BRI - generic.R | 2 +- R/chemistry.R | 64 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/R/SQO BRI - generic.R b/R/SQO BRI - generic.R index b053ab5..b43f43e 100644 --- a/R/SQO BRI - generic.R +++ b/R/SQO BRI - generic.R @@ -61,7 +61,7 @@ #' @export -BRI <- function(BenthicData) #BenthicData will need to be the species abundances for each sample in the correct format noted above and in support material +BRI_ <- function(BenthicData) #BenthicData will need to be the species abundances for each sample in the correct format noted above and in support material { #loading in packages needed to run function require(tidyverse) diff --git a/R/chemistry.R b/R/chemistry.R index 39244d8..b0983c0 100644 --- a/R/chemistry.R +++ b/R/chemistry.R @@ -586,6 +586,58 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + + # ---- Step 0.5: Round the result value according to the convention given by Darrin. ---- + + # Write to the log file + writelog("\n#### Round the result value according to the convention.", logfile = logfile, verbose = verbose) + writelog("\nIf the result value is 0.005 or less, we round to 4 decimal places. \n", logfile = logfile, verbose = verbose) + writelog("\nIf the result value is under 10, we round to 2 decimal places. \n", logfile = logfile, verbose = verbose) + writelog("\nIf the result value is under 100, we round to 1 decimal place. \n", logfile = logfile, verbose = verbose) + writelog("\nIf the result value is 100 or more, we round to the nearest integer \n", logfile = logfile, verbose = verbose) + writelog("\nThe weights which we are comparing the results values to were rounded this way, so we are rounding the result values this way as well \n", logfile = logfile, verbose = verbose) + + # Actually execute the code + chemdata_csi <- chemdata_csi %>% + mutate( + Result = case_when( + Result <= 0.005 ~ round(Result, 4), + Result < 10 ~ round(Result, 2), + Result < 100 ~ round(Result, 1), + TRUE ~ round(Result) + ) + ) + + # Write code portion to the log file + writelog( + "", + code = ' + chemdata_csi <- chemdata_csi %>% + mutate( + Result = case_when( + Result <= 0.005 ~ round(Result, 4), + Result < 10 ~ round(Result, 2), + Result < 100 ~ round(Result, 1), + TRUE ~ round(Result) + ) + ) + ', + data = chemdata_csi, + logfile = logfile, + verbose = verbose + ) + + # Serve it up for download + create_download_link(data = chemdata_csi, logfile = logfile, filename = 'CSI_Step0-rounded.csv', linktext = 'Download CSI Step0 (rounded result values)', verbose = verbose) + + + # Log the separation space between steps + writelog("\n___\n___\n___" , logfile = logfile, verbose = verbose) + + + + + # ---- CSI Calclulation Step Description ---- writelog("\n\n#### Calculate CSI\n", logfile = logfile, verbose = verbose) writelog("\n##### Step 1:\n", logfile = logfile, verbose = verbose) @@ -597,12 +649,12 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( writelog("- Step 2b: Sum the weighted scores and the weights - then take the sum of the weighted scores divided by the sum of the weights. - This is the CSI Score\n", logfile = logfile, verbose = verbose) writelog("- Step 2c: Assign CSI Categories based on thresholds defined in table 3.9 of Technical Manual page 42 (3rd edition)\n", logfile = logfile, verbose = verbose) table3.9 <- " - | Category | Range | Category Score | - |-------------------|-----------------|----------------| - | Minimal Exposure | < 1.69 | 1 | - | Low Exposure | ≥ 1.69 - ≤ 2.33 | 2 | - | Moderate Exposure | > 2.33 - ≤ 2.99 | 3 | - | High Exposure | > 2.99 | 4 | +| Category | Range | Category Score | +|-------------------|-----------------|----------------| +| Minimal Exposure | < 1.69 | 1 | +| Low Exposure | ≥ 1.69 - ≤ 2.33 | 2 | +| Moderate Exposure | > 2.33 - ≤ 2.99 | 3 | +| High Exposure | > 2.99 | 4 | " writelog(table3.9, logfile = logfile, verbose = verbose) writelog("\n##### Step 3:\n", logfile = logfile, verbose = verbose) From 375994fcbe1933f7ec6296fec3d71fa3c1a97368 Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Fri, 7 Jun 2024 17:15:35 -0700 Subject: [PATCH 05/10] put BRI back to the old version with the logging --- R/BRI.R | 155 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 79 insertions(+), 76 deletions(-) diff --git a/R/BRI.R b/R/BRI.R index a592c3d..0fb316a 100644 --- a/R/BRI.R +++ b/R/BRI.R @@ -1,84 +1,87 @@ -#' Compute the benthic response index (BRI) score and BRI condition category. -#' -#' @description -#' The BRI is the abundance-weighted pollution tolerance score of the organisms present in a benthic sample. -#' The higher the BRI score, the more degraded the benthic community represented by the sample. -#' -#' @details -#' Two types of data are needed to calculate the BRI: -#' (1) the abundance of each species and -#' (2) its pollution tolerance score, P. -#' -#' The P values are available for most species present in the assemblage. Only species for which P values are available -#' are used in the BRI calculations. P values should be obtained for the appropriate habitat and from the most -#' up-to-date list available. -#' -#' The first step in the BRI calculation is to compute the 4th root of the abundance of each taxon in the sample for -#' which P values are available. The next step is to multiply the 4th root abundance value by the P value, for each -#' taxon. -#' -#' Next, separately sum all of the 4th roots of the abundances and all of the products of the 4th roots of abundance -#' and P values. Taxa that lack P values are not included in either sum. The next step is to calculate the BRI score -#' as: -#' -#' \deqn{ \frac{\sum \left(\sqrt[p]{\textrm{Abundance}} \right) \times P}{\sum \sqrt[p]{\textrm{Abundance}}} } -#' -#' The last step is to compare the BRI score to the BRI threshold values to determine the BRI category and -#' category score. -#' -#' @param benthic_data a data frame with the following headings -#' -#' \strong{\code{station_id}} - an alpha-numeric identifier of the location; -#' \strong{\code{replicate}} - a numeric identifying the replicate number of samples taken at the location; -#' \strong{\code{sample_date}} - the date of sample collection; -#' \strong{\code{latitude}} - latitude in decimal degrees; -#' \strong{\code{longitude}} - longitude in decimal degrees. -#' Make sure there is a negative sign for the Western coordinates; -#' \strong{\code{species}} - name of the fauna, ideally in SCAMIT ed12 format. If no animals were present in the sample use -#' NoOrganismsPresent with 0 abundance; -#' -#' @usage -#' BRI(benthic_data) -#' -#' @examples -#' data(benthic_sampledata) # load sample data -#' BRI(benthic_sampledata) # see the output -#' -#' @importFrom dplyr left_join filter select mutate group_by summarize case_when -#' @importFrom tidyr pivot_longer -#' -#' @export - -BRI <- function(benthic_data) { - - out <- benthic_data %>% - left_join(sqo.list.new, by = c("taxon" = "taxon_name")) %>% - filter(!is.na(tolerance_score)) %>% - select(stratum, station_id, sample_date, replicate, taxon, abundance, tolerance_score) %>% + + +BRI <- function(BenthicData, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'log.txt' ), verbose = T) +{ + + # Initialize Logging + init.log(logfile, base.func.name = sys.call(), current.time = Sys.time(), is.base.func = length(sys.calls()) == 1, verbose = verbose) + hyphen.log.prefix <- rep('-', (2 * (length(sys.calls))) - 1) + + writelog('\nBEGIN: BRI function.\n', logfile = logfile, verbose = verbose) + + writelog('*** DATA *** Input to BRI function - BRI-step0.csv', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog(BenthicData, logfile = file.path(dirname(logfile), 'BRI-step0.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) + + writelog('*** DATA *** sqo.list.new - which gets joined with BenthicData', logfile = logfile, verbose = verbose) + writelog(sqo.list.new, logfile = file.path(dirname(logfile), 'BRI-sqo.list.new.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) + + out <- BenthicData %>% + left_join(sqo.list.new, by = c('Taxon' = 'TaxonName')) + + writelog('*** DATA *** Benthic data joined with sqo.list.new', logfile = logfile, verbose = verbose) + writelog(out, logfile = file.path(dirname(logfile), 'BRI-step1.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) + + out <- out %>% + # I assume that the next line is something they had in there as a method of removing duplicates + # for this reason, this next line will likely be eliminated. + # They grouped by all the columns that were selected (In query BRI - 1) + # Instead, if need be we can use something from dplyr that deals with duplicates + # I actually found that it didn't appear to make a difference + filter(!is.na(ToleranceScore)) %>% + #rename(Stratum) %>% + select(Stratum, StationID, SampleDate, Replicate, Taxon, Abundance, ToleranceScore) + + writelog('*** DATA *** Remove NA tolerance scores and select only the columns: Stratum, StationID, SampleDate, Replicate, Taxon, Abundance, ToleranceScore', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog(out, logfile = file.path(dirname(logfile), 'BRI-step2.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) + + out <- out %>% + # End of BRI - 2 query. Begin BRI - 3 query mutate( - fourthroot_abun = abundance ^ 0.25, - weighted_score = fourthroot_abun * tolerance_score + fourthroot_abun = Abundance ** 0.25, + tolerance_score = fourthroot_abun * ToleranceScore + ) + + writelog('*** DATA *** Calculate fourthroot of abundance - also multiply by tolerancs score - BRI-step3.csv', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog(out, logfile = file.path(dirname(logfile), 'BRI-step3.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) + + writelog('Next get the Score - group by Stratum, StationID, SampleDate, Replicate and do: (sum of the tolerance scores)/(sum of fourthroot abundances)', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog('Then Get Categories (CASQO Technical Manual 3rd Edition Page 72 - Table 4.24)', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog('---- < 39.96 is Reference', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog('---- >=39.96 and <49.15 is Low', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog('---- >=49.15 and <73.27 is Reference', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog('---- >=73.27 is High', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + + out <- out %>% + # End of BRI - 3. Begin BRI - 4 + group_by( + Stratum, StationID, SampleDate, Replicate ) %>% - group_by(stratum, station_id, sample_date, replicate) %>% summarize( - score = sum(weighted_score, na.rm = TRUE) / sum(fourthroot_abun, na.rm = TRUE), - .groups = 'drop' + Score = sum(tolerance_score, na.rm = T) / sum(fourthroot_abun, na.rm = T) ) %>% + # Output the BRI category given the BRI score and the thresholds for Southern California Marine Bays + # CASQO Technical Manual 3rd Edition Page 72 - Table 4.24 mutate( - category = case_when( - score < 39.96 ~ "Reference", - score >= 39.96 & score < 49.15 ~ "Low Disturbance", - score >= 49.15 & score < 73.27 ~ "Moderate Disturbance", - score >= 73.27 ~ "High Disturbance" - ), - category_score = case_when( - category == "Reference" ~ 1, - category == "Low Disturbance" ~ 2, - category == "Moderate Disturbance" ~ 3, - category == "High Disturbance" ~ 4 - ), - index = "BRI" - ) + Category = case_when( (Score < 39.96) ~ "Reference", + (Score >= 39.96 & Score < 49.15) ~ "Low Disturbance", + (Score >= 49.15 & Score < 73.27) ~ "Moderate Disturbance", + (Score >= 73.27) ~ "High Disturbance" + )) %>% + # Output the BRI category score given the category for thresholds for Southern CA Marine Bays + mutate( + `Category Score` = case_when( (Category == "Reference") ~ 1, + (Category == "Low Disturbance") ~ 2, + (Category == "Moderate Disturbance") ~ 3, + (Category == "High Disturbance") ~ 4 ) + ) %>% + dplyr::mutate(Index = "BRI") + + + writelog('*** DATA *** Final BRI dataframe: BRI-final.csv', logfile = logfile, verbose = verbose, prefix = hyphen.log.prefix) + writelog(out, logfile = file.path(dirname(logfile), 'BRI-final.csv'), filetype = 'csv', verbose = verbose, prefix = hyphen.log.prefix) + + + writelog('\nEND: BRI function.\n', logfile = logfile, verbose = verbose) return(out) } From ed5fe461878c87c8055a121bf4d01f17ac583f78 Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Mon, 10 Jun 2024 08:24:07 -0700 Subject: [PATCH 06/10] added the roxygen comments to the BRI function - it is likely that this BRI function will be replaced by the function definition found in SQO BRI - generic.R --- R/BRI.R | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/R/BRI.R b/R/BRI.R index 0fb316a..25f2239 100644 --- a/R/BRI.R +++ b/R/BRI.R @@ -1,4 +1,64 @@ - +#' Compute the benthic response index (BRI) score and BRI condition category. +#' +#' @description +#' The BRI is the abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher +#' the BRI score, the more degraded the benthic community represented by the sample. +#' +#' @details +#' The BRI is the 4th root relative abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher +#' the BRI score, the more degraded the benthic community represented by the sample. +#' +#' Two types of data are needed to calculate the BRI: +#' +#' (1) the abundance of each species +#' (2) species-specific pollution tolerance score (aka, P Value) +#' +#' Tolerance Values are stored in the Southern California SQO Species List provided with this coded. Species names are periodically +#' updated by benthic experts. +#' +#' The BRI is only calculated from those taxa with a tolerance score. The first step in the BRI calculation is to compute the 4th root +#' of the abundance of each taxon in the sample that have an associated tolerance score +#' The next step is to multiply the 4th root abundance value by the tolerance score for each taxon. +#' The next step is to sum all of the 4th root abundance values in a given sample. +#' The actual BRI score is calculated as: +#' +#' \deqn{ \frac{\sum \left(\sqrt[p]{\textrm{Abundance}} \right) \times P}{\sum \sqrt[p]{\textrm{Abundance}}} } +#' +#' The last step is to convert the BRI score to condition category using the category thresholds listed in Table 5. +#' +#'
+#' +#' +#' +#' @param BenthicData a data frame with the following headings +#' +#' \strong{\code{StationID}} - an alpha-numeric identifier of the location; +#' +#' \strong{\code{Replicate}} - a numeric identifying the replicate number of samples taken at the location; +#' +#' \strong{\code{SampleDate}} - the date of sample collection; +#' +#' \strong{\code{Latitude}} - latitude in decimal degrees; +#' +#' \strong{\code{Longitude}} - longitude in decimal degrees. +#' Make sure there is a negative sign for the Western coordinates; +#' +#' \strong{\code{Species}} - name of the fauna, ideally in SCAMIT ed12 format, do not use sp. or spp., +#' use sp only or just the Genus. If no animals were present in the sample use +#' NoOrganismsPresent with 0 abundance; +#' +#' @usage +#' BRI(benthic_data) +#' +#' @examples +#' data(benthic_sampledata) # load sample data +#' BRI(benthic_sampledata) # see the output +#' +#' @import vegan +#' @import reshape2 +#' @importFrom dplyr left_join filter rename select mutate group_by summarize summarise case_when +#' +#' @export BRI <- function(BenthicData, logfile = file.path(getwd(), 'logs', format(Sys.time(), "%Y-%m-%d_%H:%M:%S"), 'log.txt' ), verbose = T) { From 6e4af85de92235bc482653fd459e1c61018e5e8c Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Mon, 10 Jun 2024 08:27:18 -0700 Subject: [PATCH 07/10] adding documentation for the generic BRI function - the plan is to replace the current BRI function with this one --- NAMESPACE | 1 + R/SQO BRI - generic.R | 5 ++-- man/BRI.Rd | 26 +++++++--------- man/BRI_.Rd | 59 +++++++++++++++++++++++++++++++++++++ man/chemdata_prep.Rd | 2 +- man/create_download_link.Rd | 3 +- man/init.log.Rd | 3 +- man/writelog.Rd | 4 ++- 8 files changed, 81 insertions(+), 22 deletions(-) create mode 100644 man/BRI_.Rd diff --git a/NAMESPACE b/NAMESPACE index 43b1764..3a7abcf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(BRI) +export(BRI_) export(CSI) export(IBI) export(LRM) diff --git a/R/SQO BRI - generic.R b/R/SQO BRI - generic.R index b43f43e..629d314 100644 --- a/R/SQO BRI - generic.R +++ b/R/SQO BRI - generic.R @@ -48,11 +48,11 @@ #' NoOrganismsPresent with 0 abundance; #' #' @usage -#' BRI(benthic_data) +#' BRI_(benthic_data) #' #' @examples #' data(benthic_sampledata) # load sample data -#' BRI(benthic_sampledata) # see the output +#' BRI_(benthic_sampledata) # see the output #' #' @import vegan #' @import reshape2 @@ -60,7 +60,6 @@ #' #' @export - BRI_ <- function(BenthicData) #BenthicData will need to be the species abundances for each sample in the correct format noted above and in support material { #loading in packages needed to run function diff --git a/man/BRI.Rd b/man/BRI.Rd index 778044f..ab02281 100644 --- a/man/BRI.Rd +++ b/man/BRI.Rd @@ -29,30 +29,26 @@ The BRI is the abundance weighted pollution tolerance score of the organisms pre the BRI score, the more degraded the benthic community represented by the sample. } \details{ -The BRI is the abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher +The BRI is the 4th root relative abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher the BRI score, the more degraded the benthic community represented by the sample. Two types of data are needed to calculate the BRI: - (1) the abundance of each species and - (2) its pollution tolerance score, P. - - The P values are available for most species present in the assemblage. Only species for which P values are avialable - are used in the BRI calculations. P values showld be obtained for the appropriate habitat and from the most - up-to-date list available. + (1) the abundance of each species + (2) species-specific pollution tolerance score (aka, P Value) - The first step in the BRI calculation is to compute the 4th root of the abundance of each taxon in the sample for - which P values are available. The next step is to multiply the 4th root abundance value by the P value, for each - taxon. + Tolerance Values are stored in the Southern California SQO Species List provided with this coded. Species names are periodically + updated by benthic experts. - Next, separately sum all of the 4th roots of the abundances and all of the products of the 4th roots of abundance - and P values. Taxa that lack P values are not included in either sum. The next step is to calculate the BRI score - as: + The BRI is only calculated from those taxa with a tolerance score. The first step in the BRI calculation is to compute the 4th root + of the abundance of each taxon in the sample that have an associated tolerance score + The next step is to multiply the 4th root abundance value by the tolerance score for each taxon. + The next step is to sum all of the 4th root abundance values in a given sample. + The actual BRI score is calculated as: \deqn{ \frac{\sum \left(\sqrt[p]{\textrm{Abundance}} \right) \times P}{\sum \sqrt[p]{\textrm{Abundance}}} } - The last step is to compare the BRI score to the BRI threshold values in Table 5 to determine the BRI category and - category score. + The last step is to convert the BRI score to condition category using the category thresholds listed in Table 5.
} diff --git a/man/BRI_.Rd b/man/BRI_.Rd new file mode 100644 index 0000000..2c5cfe0 --- /dev/null +++ b/man/BRI_.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SQO BRI - generic.R +\name{BRI_} +\alias{BRI_} +\title{Compute the benthic response index (BRI) score and BRI condition category.} +\usage{ +BRI_(benthic_data) +} +\arguments{ +\item{BenthicData}{a data frame with the following headings + + \strong{\code{StationID}} - an alpha-numeric identifier of the location; + + \strong{\code{Replicate}} - a numeric identifying the replicate number of samples taken at the location; + + \strong{\code{SampleDate}} - the date of sample collection; + + \strong{\code{Latitude}} - latitude in decimal degrees; + + \strong{\code{Longitude}} - longitude in decimal degrees. + Make sure there is a negative sign for the Western coordinates; + + \strong{\code{Species}} - name of the fauna, ideally in SCAMIT ed12 format, do not use sp. or spp., + use sp only or just the Genus. If no animals were present in the sample use + NoOrganismsPresent with 0 abundance;} +} +\description{ +The BRI is the abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher + the BRI score, the more degraded the benthic community represented by the sample. +} +\details{ +The BRI is the 4th root relative abundance weighted pollution tolerance score of the organisms present in a benthic sample. The higher + the BRI score, the more degraded the benthic community represented by the sample. + + Two types of data are needed to calculate the BRI: + + (1) the abundance of each species + (2) species-specific pollution tolerance score (aka, P Value) + + Tolerance Values are stored in the Southern California SQO Species List provided with this coded. Species names are periodically + updated by benthic experts. + + The BRI is only calculated from those taxa with a tolerance score. The first step in the BRI calculation is to compute the 4th root + of the abundance of each taxon in the sample that have an associated tolerance score + The next step is to multiply the 4th root abundance value by the tolerance score for each taxon. + The next step is to sum all of the 4th root abundance values in a given sample. + The actual BRI score is calculated as: + + \deqn{ \frac{\sum \left(\sqrt[p]{\textrm{Abundance}} \right) \times P}{\sum \sqrt[p]{\textrm{Abundance}}} } + + The last step is to convert the BRI score to condition category using the category thresholds listed in Table 5. + +
+} +\examples{ +data(benthic_sampledata) # load sample data +BRI_(benthic_sampledata) # see the output + +} diff --git a/man/chemdata_prep.Rd b/man/chemdata_prep.Rd index f5ee4e1..7cbd046 100644 --- a/man/chemdata_prep.Rd +++ b/man/chemdata_prep.Rd @@ -7,7 +7,7 @@ chemdata_prep(chemdata) } \arguments{ -\item{chemdata}{a dataframe with the following columns: +\item{chemdata_prep.input}{a dataframe with the following columns: \code{StationID}, diff --git a/man/create_download_link.Rd b/man/create_download_link.Rd index d2862a3..f327da2 100644 --- a/man/create_download_link.Rd +++ b/man/create_download_link.Rd @@ -9,7 +9,8 @@ create_download_link( logfile, filename, linktext = "Download the data", - include.rownames = FALSE + include.row.names = FALSE, + verbose = TRUE ) } \arguments{ diff --git a/man/init.log.Rd b/man/init.log.Rd index 23eb887..e298b94 100644 --- a/man/init.log.Rd +++ b/man/init.log.Rd @@ -11,7 +11,8 @@ init.log( current.time = Sys.time(), is.base.func = T, verbose = T, - title = "Log" + title = "Log", + libraries = c("rmarkdown") ) } \arguments{ diff --git a/man/writelog.Rd b/man/writelog.Rd index d1ccd76..0135e2b 100644 --- a/man/writelog.Rd +++ b/man/writelog.Rd @@ -13,7 +13,9 @@ writelog( prefix = NULL, include.row.names = F, code = NULL, - data = NULL + data = NULL, + echo.code = TRUE, + pageLength = 10 ) } \arguments{ From d74dbc9f6505ca14e7a78602b45ccbc86f7c35f6 Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Mon, 10 Jun 2024 08:39:16 -0700 Subject: [PATCH 08/10] importing ymd in SQO BRI Generic; Also Lowercasing column names of the input data in SQO BRI Generic --- NAMESPACE | 1 + R/SQO BRI - generic.R | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 3a7abcf..d5b94a9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,6 +39,7 @@ importFrom(dplyr,rename) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(dplyr,summarize) +importFrom(lubridate,ymd) importFrom(plyr,rbind.fill) importFrom(purrr,map) importFrom(purrr,map_dfr) diff --git a/R/SQO BRI - generic.R b/R/SQO BRI - generic.R index 629d314..e22edee 100644 --- a/R/SQO BRI - generic.R +++ b/R/SQO BRI - generic.R @@ -57,13 +57,25 @@ #' @import vegan #' @import reshape2 #' @importFrom dplyr left_join filter rename select mutate group_by summarize summarise case_when +#' @importFrom lubridate ymd #' #' @export BRI_ <- function(BenthicData) #BenthicData will need to be the species abundances for each sample in the correct format noted above and in support material { + + + # I put these in the import statements in the roxygen comments + # I think the only function that may have came from tidyverse that this function needed was ymd from lubridate + # If others come up we can add them as needed, or if it becomes too much we can import the whole tidyverse package (@import tidyverse) + # - Robert 06.10.2024 + #loading in packages needed to run function - require(tidyverse) + # require(tidyverse) + + # It appears that this version of the function works with all lowercase column names - Robert 06.10.2024 + names(BenthicData) <- names(BenthicData) %>% tolower() + #loading in SQO species list that contains p codes, amongst other things load("data/SoCal SQO LU 4_7_20.RData") From c681c07c78aca6ebb353938d2b76b40cefffe172 Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Mon, 10 Jun 2024 09:44:50 -0700 Subject: [PATCH 09/10] remove rounding from chemdata_prep --- R/chemistry.R | 75 +++++++++++++++++---------------------------------- 1 file changed, 25 insertions(+), 50 deletions(-) diff --git a/R/chemistry.R b/R/chemistry.R index b0983c0..9f86cb1 100644 --- a/R/chemistry.R +++ b/R/chemistry.R @@ -1598,23 +1598,34 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log verbose = verbose ) + # define columns for which to check for duplicates + dupcols <- intersect(names(chemdata_prep.input), c('stationid', 'analytename', 'sampletypecode', 'labrep', 'fieldrep')) + writelog( + "\n##### define columns for which to check for duplicates:", + code = "dupcols <- intersect(names(chemdata_prep.input), c('stationid', 'analytename', 'sampletypecode', 'labrep', 'fieldrep'))", + logfile = logfile, + verbose = verbose, + pageLength = 5 + ) + + # Isolate duplicates for display purposes - chemdupes <- chemdata_prep.input[(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ] + chemdupes <- chemdata_prep.input[(chemdata_prep.input %>% select(all_of(dupcols)) %>% duplicated), ] writelog( "\n##### Duplicate records:", - code = 'chemdupes <- chemdata_prep.input[(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ]', + code = 'chemdupes <- chemdata_prep.input[(chemdata_prep.input %>% select(all_of(dupcols)) %>% duplicated), ]', data = chemdupes, logfile = logfile, verbose = verbose, pageLength = 5 ) - writelog(chemdata_prep.input, file.path(dirname(logfile), 'chemdata-preprocessing-step3.csv'), filetype = 'csv', verbose = verbose) + # Purge duplicates from actual input data - chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ] + chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(all_of(dupcols)) %>% duplicated), ] writelog( "\n##### Chemdata Prep Input Data with Duplicates Removed:", - code = 'chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(stationid, analytename, sampletypecode, labrep, fieldrep) %>% duplicated), ]', + code = 'chemdata_prep.input <- chemdata_prep.input[!(chemdata_prep.input %>% select(all_of(dupcols)) %>% duplicated), ]', data = chemdata_prep.input %>% head(15), logfile = logfile, verbose = verbose, @@ -2176,21 +2187,10 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog("\n___\n___\n___\n___ " , logfile = logfile, verbose = verbose) - # ---- Step 14: Fix Rounding ---- - # Conversation with Darrin on April 16th 2020 - # We need to fix the rounding in this thing. - # The fixes may also need to be implemented in the chemdata_prep function - # Certain analytes result values need to be rounded to different numbers of decimal places - # going off memory here. Darrin is the one to consult, and he can show the latest greatest version of the excel tool - # Copper - 1 - # Lead - 1 - # Mercury - 2 - # Zinc - 1 - # HPAH - 1 - # LPAH - 1 - # All the rest - 2 - writelog("\n#### Step 14: Combine all analytes and fix rounding", logfile = logfile, verbose = verbose) - writelog("\n##### Copper, Lead, Zinc, High/Low PAHs will be rounded to 1 decimal place; All others will be rounded to 2", logfile = logfile, verbose = verbose) + + + # ---- Step 14: Combine data ---- + writelog("\n#### Step 14: Combine all analytes", logfile = logfile, verbose = verbose) # Code # Step 13 dealt only with DDTs - essentially ddts_total is the step 13 dataframe @@ -2199,7 +2199,7 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log writelog("\n##### Combine data with DDTs", logfile = logfile, verbose = verbose) # Actually perform the code - chemdata.step14a <- chemdata.step12 %>% + chemdata.preprocessed.final <- chemdata.step12 %>% bind_rows(ddts_total) %>% arrange(stationid, compound) %>% rename( @@ -2210,9 +2210,9 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log # Write code to the R Markdown file writelog( - "", + "Final Chemdata Prep Output", code = ' - chemdata.step14a <- chemdata.step12 %>% + chemdata.preprocessed.final <- chemdata.step12 %>% bind_rows(ddts_total) %>% arrange(stationid, compound) %>% rename( @@ -2221,40 +2221,15 @@ chemdata_prep <- function(chemdata_prep.input, logfile = file.path(getwd(), 'log Result = result ) ', - data = chemdata.step14a %>% head(15), - logfile = logfile, - verbose = verbose - ) - - - # Actually perform the rounding - chemdata.preprocessed.final <- chemdata.step14a %>% - mutate( - Result = case_when( - AnalyteName %in% c('Copper','Lead','Zinc','HPAH','LPAH') ~ round(Result, digits = 1), - TRUE ~ round(Result, digits = 2) - ) - ) - - # Write it to the R Markdown logs - writelog( - "Final Chemdata Prep output:", - code = ' - chemdata.preprocessed.final <- chemdata.step14a %>% - mutate( - Result = case_when( - AnalyteName %in% c(\'Copper\',\'Lead\',\'Zinc\',\'HPAH\',\'LPAH\') ~ round(Result, digits = 1), - TRUE ~ round(Result, digits = 2) - ) - ) - ', data = chemdata.preprocessed.final %>% head(15), logfile = logfile, verbose = verbose ) + writelog("\n#### END Function: chemdata_prep\n", logfile = logfile, verbose = verbose) + return(chemdata.preprocessed.final) } From 148ef71441ed2b73321bd0a393da829d5c6e5161 Mon Sep 17 00:00:00 2001 From: Robert Butler Date: Mon, 10 Jun 2024 12:59:03 -0700 Subject: [PATCH 10/10] in CSI calculation, round to 4 decimal places if the result value is below 1 --- R/chemistry.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/chemistry.R b/R/chemistry.R index 9f86cb1..81a5408 100644 --- a/R/chemistry.R +++ b/R/chemistry.R @@ -601,7 +601,8 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( chemdata_csi <- chemdata_csi %>% mutate( Result = case_when( - Result <= 0.005 ~ round(Result, 4), + # June 10, 2024 - It was decided we will round to 4 decimal places if the result value is less than 1 + Result <= 1 ~ round(Result, 4), Result < 10 ~ round(Result, 2), Result < 100 ~ round(Result, 1), TRUE ~ round(Result) @@ -615,7 +616,8 @@ CSI <- function(chemdata.csi.input, preprocessed = F, logfile = file.path(getwd( chemdata_csi <- chemdata_csi %>% mutate( Result = case_when( - Result <= 0.005 ~ round(Result, 4), + # June 10, 2024 - It was decided we will round to 4 decimal places if the result value is less than 1 + Result <= 1 ~ round(Result, 4), Result < 10 ~ round(Result, 2), Result < 100 ~ round(Result, 1), TRUE ~ round(Result)