From 2da04b29f339495f7f401d0d14b928a68395e959 Mon Sep 17 00:00:00 2001
From: meetagrawal09 <agrawalmeet91@gmail.com>
Date: Mon, 27 May 2024 18:35:33 +0530
Subject: [PATCH 001/155] added test dummies

---
 .../tests/testthat/test.jagify.R              | 33 +++++++++++++++
 .../tests/testthat/test.meta.analysis.R       |  3 ++
 .../tests/testthat/test.run.meta.analysis.R   | 41 +++----------------
 .../tests/testthat/test.single.MA.R           |  5 +++
 4 files changed, 46 insertions(+), 36 deletions(-)
 create mode 100644 modules/meta.analysis/tests/testthat/test.jagify.R
 create mode 100644 modules/meta.analysis/tests/testthat/test.meta.analysis.R
 create mode 100644 modules/meta.analysis/tests/testthat/test.single.MA.R

diff --git a/modules/meta.analysis/tests/testthat/test.jagify.R b/modules/meta.analysis/tests/testthat/test.jagify.R
new file mode 100644
index 00000000000..7d934c10113
--- /dev/null
+++ b/modules/meta.analysis/tests/testthat/test.jagify.R
@@ -0,0 +1,33 @@
+#-------------------------------------------------------------------------------
+# Copyright (c) 2012 University of Illinois, NCSA.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the 
+# University of Illinois/NCSA Open Source License
+# which accompanies this distribution, and is available at
+# http://opensource.ncsa.illinois.edu/license.html
+#-------------------------------------------------------------------------------
+
+test_that("jagify correctly assigns treatment index of 1 to all control treatments, regardless of alphabetical order", {
+  ## generate test data; controls assigned to early alphabet and late alphabet trt names
+  testresult <- data.frame(citation_id = 1,
+                           site_id = rep(1:2, each = 5),
+                           name = rep(letters[1:5],2),
+                           trt_id = as.character(rep(letters[1:5],2)),
+                           control = c(1, rep(0,8), 1), 
+                           greenhouse = c(rep(0,5), rep(1,5)),
+                           date = 1,
+                           time = NA,
+                           cultivar_id = 1,
+                           specie_id = 1,
+                           n = 2,
+                           mean = sqrt(1:10),
+                           stat = 1,
+                           statname = "SE",
+                           treatment_id = 1:10
+  )
+  i <- sapply(testresult, is.factor)
+  testresult[i] <- lapply(testresult[i], as.character)
+
+  jagged.data <- jagify(testresult)
+  expect_equal(jagged.data$trt_num[jagged.data$trt == "control"], c(1, 1)) 
+})
diff --git a/modules/meta.analysis/tests/testthat/test.meta.analysis.R b/modules/meta.analysis/tests/testthat/test.meta.analysis.R
new file mode 100644
index 00000000000..87841bea8ca
--- /dev/null
+++ b/modules/meta.analysis/tests/testthat/test.meta.analysis.R
@@ -0,0 +1,3 @@
+test_that("`pecan.ma`", {
+  
+})
\ No newline at end of file
diff --git a/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R b/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
index f777fdbd7e8..23837003cd5 100644
--- a/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
+++ b/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
@@ -1,42 +1,11 @@
-#-------------------------------------------------------------------------------
-# Copyright (c) 2012 University of Illinois, NCSA.
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the 
-# University of Illinois/NCSA Open Source License
-# which accompanies this distribution, and is available at
-# http://opensource.ncsa.illinois.edu/license.html
-#-------------------------------------------------------------------------------
+test_that("`runModule.run.meta.analysis`", {
 
-context("run.meta.analysis")
+})
 
+test_that("`run.meta.analysis`", {
 
-test_that("singleMA gives expected result for example inputs",{
-  ## need to calculate x
-  ## x <- singleMA(....)
-  #expect_equal(round(summary(x)$statistics["beta.o", "Mean"]), 5)
 })
 
-test_that("jagify correctly assigns treatment index of 1 to all control treatments, regardless of alphabetical order", {
-  ## generate test data; controls assigned to early alphabet and late alphabet trt names
-  testresult <- data.frame(citation_id = 1,
-                           site_id = rep(1:2, each = 5),
-                           name = rep(letters[1:5],2),
-                           trt_id = as.character(rep(letters[1:5],2)),
-                           control = c(1, rep(0,8), 1), 
-                           greenhouse = c(rep(0,5), rep(1,5)),
-                           date = 1,
-                           time = NA,
-                           cultivar_id = 1,
-                           specie_id = 1,
-                           n = 2,
-                           mean = sqrt(1:10),
-                           stat = 1,
-                           statname = "SE",
-                           treatment_id = 1:10
-  )
-  i <- sapply(testresult, is.factor)
-  testresult[i] <- lapply(testresult[i], as.character)
+test_that("`run.meta.analysis.pft`", {
 
-  jagged.data <- jagify(testresult)
-  expect_equal(jagged.data$trt_num[jagged.data$trt == "control"], c(1, 1)) 
-})
+})
\ No newline at end of file
diff --git a/modules/meta.analysis/tests/testthat/test.single.MA.R b/modules/meta.analysis/tests/testthat/test.single.MA.R
new file mode 100644
index 00000000000..c24c9c0d2aa
--- /dev/null
+++ b/modules/meta.analysis/tests/testthat/test.single.MA.R
@@ -0,0 +1,5 @@
+test_that("`single.MA` gives expected result for example inputs", {
+  ## need to calculate x
+  ## x <- singleMA(....)
+  #expect_equal(round(summary(x)$statistics["beta.o", "Mean"]), 5)
+})

From 84739bf37d8f9138a5e1e1f06c310a24195412bc Mon Sep 17 00:00:00 2001
From: Meet Agrawal <meet.m.agrawal@oracle.com>
Date: Sun, 9 Jun 2024 17:32:26 +0530
Subject: [PATCH 002/155] fixed test

---
 modules/meta.analysis/tests/testthat/test.jagify.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/meta.analysis/tests/testthat/test.jagify.R b/modules/meta.analysis/tests/testthat/test.jagify.R
index 7d934c10113..7541783f18b 100644
--- a/modules/meta.analysis/tests/testthat/test.jagify.R
+++ b/modules/meta.analysis/tests/testthat/test.jagify.R
@@ -25,9 +25,7 @@ test_that("jagify correctly assigns treatment index of 1 to all control treatmen
                            statname = "SE",
                            treatment_id = 1:10
   )
-  i <- sapply(testresult, is.factor)
-  testresult[i] <- lapply(testresult[i], as.character)
 
   jagged.data <- jagify(testresult)
-  expect_equal(jagged.data$trt_num[jagged.data$trt == "control"], c(1, 1)) 
+  expect_equal(jagged.data$trt_num[jagged.data$trt == "control"], c(1, 1))
 })

From dccd805863f3b8545b793c3c639e5e0af4309ae4 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 27 Jun 2024 15:29:27 +0530
Subject: [PATCH 003/155] Add Preprocess Function for Data Cleaning and
 Validation

Description:

This PR introduces a new preprocess function designed to streamline the data cleaning and validation process. This function reads input data and site coordinates, validates the presence of specified date and carbon pool, and ensures the consistency of data dimensions. It outputs a structured list containing the cleaned data, ready for further analysis. Below is an extended description of the new function and its components.

Function: preprocess

Purpose:
The preprocess function is created to read and validate input data and site coordinates, ensuring that the data is correctly formatted and consistent for further processing. It handles potential inconsistencies in the data, providing informative messages and adjustments where necessary.

Parameters:
data_path: Path to the RDS file containing the input data.
coords_path: Path to the CSV file containing site coordinates.
date: The specific date for which the carbon data is to be extracted.
C_pool: The specific carbon pool within the input data to focus on.
Process:
Reading Data:

Reads the input data from the provided RDS file.
Reads the site coordinates from the provided CSV file.
Validation:

Checks if the specified date exists in the input data. If not, the function stops and returns an error message.
Extracts the carbon data for the specified date and validates the existence of the specified carbon pool. If the carbon pool is not found, the function stops and returns an error message.
Data Transformation:

Transposes the extracted carbon data to a data frame format, ensuring each column represents an ensemble.
Renames the columns to a consistent naming convention (e.g., "ensemble1", "ensemble2", etc.).
Coordinate Validation:

Ensures that the site coordinates data contains 'lon' and 'lat' columns. If these columns are missing, the function stops and returns an error message.
Data Consistency:

Validates that the number of rows in the site coordinates matches the number of rows in the carbon data.
If there is a mismatch in the number of rows, the function truncates either the site coordinates or the carbon data to match the row counts, ensuring consistency.
Output:
The function returns a list containing:

input_data: The original input data read from the RDS file.
site_coordinates: The validated and possibly truncated site coordinates.
carbon_data: The validated and possibly truncated carbon data.
Messages:
The function provides informative messages during the preprocessing steps, alerting the user to any adjustments made to the data to ensure consistency.

Example Usage:

```preprocessed_data <- preprocess("path/to/input_data.rds", "path/to/site_coords.csv", "2022-01-01", "TotalCarbon")```
Benefits:
Efficiency: Streamlines the data preparation process, reducing manual validation and transformation steps.
Error Handling: Provides clear error messages and handles common data issues, improving robustness.
Consistency: Ensures consistent data formats and dimensions, facilitating further analysis and modeling.
---
 .../assim.sequential/R/downscale_function.R   | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 389fe849776..b82f76d930b 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -13,6 +13,49 @@
 ##'
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
+# Preprocess function to check and clean the data
+preprocess <- function(data_path, coords_path, date, C_pool) {
+  # Read the input data and site coordinates
+  input_data <- readRDS(data_path)
+  site_coordinates <- read_csv(coords_path)
+  
+  # Ensure the date exists in the input data
+  if (!date %in% names(input_data)) {
+    stop(paste("Date", date, "not found in the input data."))
+  }
+  
+  # Extract the carbon data for the specified focus year
+  index <- which(names(input_data) == date)
+  data <- input_data[[index]]
+  
+  # Ensure the carbon pool exists in the input data
+  if (!C_pool %in% names(data)) {
+    stop(paste("Carbon pool", C_pool, "not found in the input data."))
+  }
+  
+  carbon_data <- as.data.frame(t(data[which(names(data) == C_pool)]))
+  names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))
+  
+  # Ensure site coordinates have 'lon' and 'lat' columns
+  if (!all(c("lon", "lat") %in% names(site_coordinates))) {
+    stop("Site coordinates must contain 'lon' and 'lat' columns.")
+  }
+  
+  # Ensure the number of rows in site coordinates matches the number of rows in carbon data
+  if (nrow(site_coordinates) != nrow(carbon_data)) {
+    message("Number of rows in site coordinates does not match the number of rows in carbon data.")
+    if (nrow(site_coordinates) > nrow(carbon_data)) {
+      message("Truncating site coordinates to match carbon data rows.")
+      site_coordinates <- site_coordinates[1:nrow(carbon_data), ]
+    } else {
+      message("Truncating carbon data to match site coordinates rows.")
+      carbon_data <- carbon_data[1:nrow(site_coordinates), ]
+    }
+  }
+  
+  message("Preprocessing completed successfully.")
+  return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
+}
 
 NA_downscale <- function(data, coords, date, C_pool, covariates){
   

From eaa08465ef7231e12f43cc38a60dd40e69ba0630 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 29 Jun 2024 14:41:31 +0530
Subject: [PATCH 004/155] Rename Function to NA_preprocess and Add Roxygen
 Documentation

Description

Renaming to NA_preprocess:

The function has been renamed from preprocess to NA_preprocess to better reflect its specific role in the North America downscaling process. This change ensures that the function name is consistent with the existing NA_downscale function, promoting a more organized and intuitive codebase.
Addition of Roxygen Documentation:

Comprehensive Roxygen documentation has been added to the NA_preprocess function. This documentation includes the following sections:

Title and Name:

Provides a clear title and the function's name for easy identification.
Description:

Offers a brief overview of the function's purpose and functionality.
Parameters:

Details each parameter, including data_path, coords_path, date, and C_pool, describing their expected input types and roles within the function.
Details:

Explains the specific tasks performed by the function, such as reading input data, validating the date and carbon pool, and ensuring the consistency of site coordinates.
Return Value:

Describes the structure and contents of the list returned by the function, which includes the input data, cleaned site coordinates, and extracted carbon data.
---
 .../assim.sequential/R/downscale_function.R   | 38 +++++++++++++------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index b82f76d930b..d625f4bb66f 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -1,20 +1,18 @@
-##' @title North America Downscale Function
-##' @name NA_downscale
-##' @author Joshua Ploshay
+##' @title Preprocess Data for Downscaling
+##' @name NA_preprocess
+##' @description This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
 ##'
-##' @param data  In quotes, file path for .rds containing ensemble data.
-##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
-##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
-##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
-##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
-##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
+##' @param data_path Character. File path for .rds containing ensemble data.
+##' @param coords_path Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".
+##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
+##' @param C_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
 ##'
-##' @description This function uses the randomForest model.
+##' @details This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
 ##'
-##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+##' @return A list containing The read .rds data , The cleaned site coordinates ,The extracted and possibly truncated carbon data.
 
 # Preprocess function to check and clean the data
-preprocess <- function(data_path, coords_path, date, C_pool) {
+NA_preprocess <- function(data_path, coords_path, date, C_pool) {
   # Read the input data and site coordinates
   input_data <- readRDS(data_path)
   site_coordinates <- read_csv(coords_path)
@@ -57,6 +55,22 @@ preprocess <- function(data_path, coords_path, date, C_pool) {
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
+##' @title North America Downscale Function
+##' @name NA_downscale
+##' @author Joshua Ploshay
+##'
+##' @param data  In quotes, file path for .rds containing ensemble data.
+##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
+##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
+##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
+##'
+##' @description This function uses the randomForest model.
+##'
+##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+
+
 NA_downscale <- function(data, coords, date, C_pool, covariates){
   
   # Read the input data and site coordinates

From cbc0a34df655d5dd8ee495f175397c0323f02e82 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 29 Jun 2024 15:28:46 +0530
Subject: [PATCH 005/155] added author name and fixed roxygen formatting
 slightly

added the name of the author for the NA_preprocess function and changed the structure of the roxygen for the NA_preprocess function slightly so as to keep it consistent with the structure throughout the repository and existing code
---
 modules/assim.sequential/R/downscale_function.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index d625f4bb66f..f8c10575ee8 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -1,14 +1,15 @@
 ##' @title Preprocess Data for Downscaling
 ##' @name NA_preprocess
-##' @description This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
+##' @author Sambhav Dixit
 ##'
 ##' @param data_path Character. File path for .rds containing ensemble data.
 ##' @param coords_path Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".
 ##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
 ##' @param C_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
-##'
 ##' @details This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
 ##'
+##' @description This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
+##'
 ##' @return A list containing The read .rds data , The cleaned site coordinates ,The extracted and possibly truncated carbon data.
 
 # Preprocess function to check and clean the data

From 50cca2cea06cfa1c7ddc7a1956accad0ccf243bb Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Tue, 2 Jul 2024 16:46:00 -0400
Subject: [PATCH 006/155] v1 hourly downscale

---
 .../R/downscale_function_hrly.R               | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 modules/assim.sequential/R/downscale_function_hrly.R

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
new file mode 100644
index 00000000000..ff340e16b3c
--- /dev/null
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -0,0 +1,96 @@
+##' @title North America Downscale Function
+##' @name NA_downscale_hrly
+##' @author Harunobu Ishii
+##'
+##' @param nc_data  In quotes, file path for .nc containing ensemble data.
+##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
+##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+##' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+##'
+##' @description This function uses the randomForest model.
+##'
+##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+
+
+NA_downscale_hrly <- function(nc_data, coords, date, covariates){
+  
+  # Read the input data and site coordinates
+  input_data <- ncvar_get(nc_data, "NEE")
+  weights_rrel <- ncvar_get(nc_data, "weights_rrel")
+  
+  # Timereadable
+  time <- nc_data$dim$time$vals
+  time_units <- nc_data$dim$time$units
+  time_origin <- as.POSIXct(substr(time_units, 12, 31), format="%Y-%m-%dT%H:%M")
+  time_readable <- time_origin + time * 3600  # Convert hours to seconds
+  
+  # Extract predictors from covariates raster using site coordinates
+  site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")
+  index <- which(time_readable == date)
+  data <- input_data[index, , ]
+  carbon_data <- as.data.frame(data)
+  predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) 
+
+  # Arrange relative weights of each ensemble member over time and space/site
+  curr_weights_rrel <- weights_rrel[, , index]
+  names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
+  colnames(curr_weights_rrel) <- paste0("ensemble",seq(1:ncol(curr_weights_rrel)))
+  
+  # Combine each ensemble member with all predictors
+  ensembles <- list()
+  for (i in seq_along(carbon_data)) {
+    ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
+  }
+  
+  # Rename the carbon_data column for each ensemble member
+  for (i in 1:length(ensembles)) {
+    ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
+  }
+  
+  # Split the observations in each data frame into two data frames based on the proportion of 3/4
+  ensembles <- lapply(ensembles, function(df) {
+    sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
+    train  <- df[sample, ]
+    test   <- df[-sample, ]
+    split_list <- list(train, test)
+    return(split_list)
+  })
+  
+  # Rename the training and testing data frames for each ensemble member
+  for (i in 1:length(ensembles)) {
+    # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
+    names(ensembles[[i]]) <- c("training", "testing")
+  }
+  
+  # Train a random forest model for each ensemble member using the training data
+  rf_output <- list()
+  for (i in 1:length(ensembles)) {
+    rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
+                                                 data = ensembles[[i]][[1]],
+                                                 ntree = 1000,
+                                                 na.action = stats::na.omit,
+                                                 keep.forest = T,
+                                                 importance = T)
+  }
+  
+  # Generate predictions (maps) for each ensemble member using the trained models
+  maps <- list(ncol(rf_output))
+  for (i in 1:length(rf_output)) {
+    maps[[i]] <- terra::predict(object = covariates,
+                                model = rf_output[[i]],na.rm = T)
+  }
+  
+  # Organize the results into a single output list
+  downscale_output <- list(ensembles, rf_output, maps, curr_weights_rrel)
+  
+  # Rename each element of the output list with appropriate ensemble numbers
+  for (i in 1:(length(downscale_output)-1)) {
+    names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
+  }
+  
+  # Rename the main components of the output list
+  names(downscale_output) <- c("data", "models", "maps", "weights_rrel")
+  
+  return(downscale_output)
+}

From 1a0594c929536a7eb958d85f057b82b9258cc06f Mon Sep 17 00:00:00 2001
From: meetagrawal09 <agrawalmeet91@gmail.com>
Date: Sat, 6 Jul 2024 18:29:36 +0530
Subject: [PATCH 007/155] added tests : runModule.run.meta.analysis,
 run.meta.analysis

---
 .../tests/testthat/test.run.meta.analysis.R   | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R b/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
index 23837003cd5..46d02343efb 100644
--- a/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
+++ b/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
@@ -1,11 +1,20 @@
-test_that("`runModule.run.meta.analysis`", {
-
+test_that("`runModule.run.meta.analysis` throws an error for incorrect input", {
+  expect_error(runModule.run.meta.analysis('test'), "only works with Settings or MultiSettings")
 })
 
-test_that("`run.meta.analysis`", {
-
+test_that("`run.meta.analysis` able to call run.meta.analysis.pft for each pft in the input list", {
+  mocked_res <- mockery::mock(1, cycle = TRUE)
+  mockery::stub(run.meta.analysis, 'run.meta.analysis.pft', mocked_res)
+  mockery::stub(run.meta.analysis, 'PEcAn.DB::db.open', 1)
+  mockery::stub(run.meta.analysis, 'PEcAn.DB::db.close', 1)
+  pfts <- list('ebifarm.salix', 'temperate.coniferous')
+  run.meta.analysis(pfts = pfts, iterations = 1, dbfiles = NULL, database = NULL)
+  mockery::expect_called(mocked_res, 2)
+  args <- mockery::mock_args(mocked_res)
+  expect_equal(args[[1]][[1]], "ebifarm.salix")
+  expect_equal(args[[2]][[1]], "temperate.coniferous")
 })
 
 test_that("`run.meta.analysis.pft`", {
-
+  
 })
\ No newline at end of file

From 3c0d9b7b8249cfff9f1a1963d3c31dee23cf4c37 Mon Sep 17 00:00:00 2001
From: meetagrawal09 <agrawalmeet91@gmail.com>
Date: Sun, 7 Jul 2024 11:12:21 +0530
Subject: [PATCH 008/155] added mockery to desc

---
 modules/meta.analysis/DESCRIPTION | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/meta.analysis/DESCRIPTION b/modules/meta.analysis/DESCRIPTION
index 1cd66a9fe56..28cbe7681d7 100644
--- a/modules/meta.analysis/DESCRIPTION
+++ b/modules/meta.analysis/DESCRIPTION
@@ -28,16 +28,17 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
 Imports:
     coda (>= 0.18),
     lattice,
-    PEcAn.utils,
+    MASS,
     PEcAn.DB,
     PEcAn.logger,
-    MASS,
     PEcAn.settings,
+    PEcAn.utils,
     rjags
 Suggests:
     ggmcmc,
     ggplot2,
     knitr (>= 1.42),
+    mockery,
     rmarkdown (>= 2.19),
     testthat (>= 1.0.2)
 SystemRequirements: JAGS

From f7e9846aa11c53bc462bfad00d20670ba0cb02ec Mon Sep 17 00:00:00 2001
From: meetagrawal09 <agrawalmeet91@gmail.com>
Date: Sun, 7 Jul 2024 11:16:42 +0530
Subject: [PATCH 009/155] bug fix

---
 docker/depends/pecan_package_dependencies.csv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 72846487865..b11ef517001 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -196,6 +196,7 @@
 "mockery","*","base/visualization","Suggests",FALSE
 "mockery","*","base/workflow","Suggests",FALSE
 "mockery","*","modules/data.atmosphere","Suggests",FALSE
+"mockery","*","modules/meta.analysis","Suggests",FALSE
 "mockery",">= 0.3.0","models/biocro","Suggests",FALSE
 "mockery",">= 0.4.3","base/db","Suggests",FALSE
 "MODISTools",">= 1.1.0","modules/data.remote","Imports",FALSE

From e79be90cf2967c5cdcaecce1e6d1c0745d78bce5 Mon Sep 17 00:00:00 2001
From: meetagrawal09 <agrawalmeet91@gmail.com>
Date: Sun, 7 Jul 2024 16:32:56 +0530
Subject: [PATCH 010/155] added final tests, fixed some typos

---
 .../03_topical_pages/94_docker/04_models.Rmd    |  2 +-
 modules/meta.analysis/R/meta.analysis.R         |  2 +-
 modules/meta.analysis/R/run.meta.analysis.R     |  2 +-
 modules/meta.analysis/man/pecan.ma.Rd           |  2 +-
 modules/meta.analysis/man/run.meta.analysis.Rd  |  2 +-
 .../tests/testthat/test.meta.analysis.R         |  3 ---
 .../tests/testthat/test.run.meta.analysis.R     | 17 +++++++++++++++--
 .../tests/testthat/test.single.MA.R             |  5 -----
 8 files changed, 20 insertions(+), 15 deletions(-)
 delete mode 100644 modules/meta.analysis/tests/testthat/test.meta.analysis.R
 delete mode 100644 modules/meta.analysis/tests/testthat/test.single.MA.R

diff --git a/book_source/03_topical_pages/94_docker/04_models.Rmd b/book_source/03_topical_pages/94_docker/04_models.Rmd
index 205098b4593..8054671922e 100644
--- a/book_source/03_topical_pages/94_docker/04_models.Rmd
+++ b/book_source/03_topical_pages/94_docker/04_models.Rmd
@@ -55,7 +55,7 @@ It is important values for `type` and `version` are set correct. The PEcAn code
 
 To build the docker image, we use a Dockerfile (see example below) and run the following command. This command will expect the Dockerfile to live in the model specific folder and the command is executed in the root pecan folder. It will copy the content of the pecan folder and make it available to the build process (in this example we do not need any additional files). 
 
-Since we can have multiple different versions of a model be available for PEcAn we ar using the following naming schema `pecan/model-<modeltype>-<version>:<pecan version`. For example the image below will be named pecan/model-ed2-git, since we do not specify the exact version it will be atomically be named `pecan/model-ed2-git:latest`.
+Since we can have multiple different versions of a model be available for PEcAn we are using the following naming schema `pecan/model-<modeltype>-<version>:<pecan version`. For example the image below will be named pecan/model-ed2-git, since we do not specify the exact version it will be atomically be named `pecan/model-ed2-git:latest`.
 
 ````bash
 docker build \
diff --git a/modules/meta.analysis/R/meta.analysis.R b/modules/meta.analysis/R/meta.analysis.R
index 290ef2391d7..e150b1f5687 100644
--- a/modules/meta.analysis/R/meta.analysis.R
+++ b/modules/meta.analysis/R/meta.analysis.R
@@ -9,7 +9,7 @@
 
 ##' Trait Meta-analysis
 ##'
-##' Runs heirarchical meta-analysis of plant trait data
+##' Runs hierarchical meta-analysis of plant trait data
 ##'
 ##' `pecan.ma` runs a hierarchical Bayesian meta-analytical model.
 ##' This model combines prior information with data from studies on the particular species or group of interest.
diff --git a/modules/meta.analysis/R/run.meta.analysis.R b/modules/meta.analysis/R/run.meta.analysis.R
index ec208a48a3b..65afcdf61bd 100644
--- a/modules/meta.analysis/R/run.meta.analysis.R
+++ b/modules/meta.analysis/R/run.meta.analysis.R
@@ -166,7 +166,7 @@ run.meta.analysis.pft <- function(pft, iterations, random = TRUE, threshold = 1.
 ##--------------------------------------------------------------------------------------------------##
 ##' Run meta analysis
 ##'
-##' This will use the following items from setings:
+##' This will use the following items from settings:
 ##' - settings$pfts
 ##' - settings$database$bety
 ##' - settings$database$dbfiles
diff --git a/modules/meta.analysis/man/pecan.ma.Rd b/modules/meta.analysis/man/pecan.ma.Rd
index 0ed7a41f4ff..3029bd7d4e5 100644
--- a/modules/meta.analysis/man/pecan.ma.Rd
+++ b/modules/meta.analysis/man/pecan.ma.Rd
@@ -47,7 +47,7 @@ as well as Y, SE, and n for each observation or summary statistic.}
 four chains with 5000 total samples from posterior
 }
 \description{
-Runs heirarchical meta-analysis of plant trait data
+Runs hierarchical meta-analysis of plant trait data
 }
 \details{
 \code{pecan.ma} runs a hierarchical Bayesian meta-analytical model.
diff --git a/modules/meta.analysis/man/run.meta.analysis.Rd b/modules/meta.analysis/man/run.meta.analysis.Rd
index 23650d59896..e0e7e5d4fad 100644
--- a/modules/meta.analysis/man/run.meta.analysis.Rd
+++ b/modules/meta.analysis/man/run.meta.analysis.Rd
@@ -40,7 +40,7 @@ nothing, as side effect saves \code{trait.mcmc} created by
 and post.distns.Rdata, respectively
 }
 \description{
-This will use the following items from setings:
+This will use the following items from settings:
 \itemize{
 \item settings$pfts
 \item settings$database$bety
diff --git a/modules/meta.analysis/tests/testthat/test.meta.analysis.R b/modules/meta.analysis/tests/testthat/test.meta.analysis.R
deleted file mode 100644
index 87841bea8ca..00000000000
--- a/modules/meta.analysis/tests/testthat/test.meta.analysis.R
+++ /dev/null
@@ -1,3 +0,0 @@
-test_that("`pecan.ma`", {
-  
-})
\ No newline at end of file
diff --git a/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R b/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
index 46d02343efb..1f9c9c9a3fd 100644
--- a/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
+++ b/modules/meta.analysis/tests/testthat/test.run.meta.analysis.R
@@ -15,6 +15,19 @@ test_that("`run.meta.analysis` able to call run.meta.analysis.pft for each pft i
   expect_equal(args[[2]][[1]], "temperate.coniferous")
 })
 
-test_that("`run.meta.analysis.pft`", {
-  
+test_that("`run.meta.analysis.pft` throws an error if it cannot find output from get.trait", {
+  pft <- list(outdir = "", name = "ebifarm.salix")
+  expect_error(
+    run.meta.analysis.pft(pft = pft, iterations = 1, dbfiles = NULL, dbcon = NULL),
+    "Could not find output from get.trait"
+  )
+})
+
+test_that("`run.meta.analysis.pft` throws an error for missing posteriorid", {
+  pft <- list(outdir = "test", name = "ebifarm.salix")
+  mockery::stub(run.meta.analysis.pft, 'file.exists', TRUE)
+  expect_error(
+    run.meta.analysis.pft(pft = pft, iterations = 1, dbfiles = NULL, dbcon = NULL, update = TRUE),
+    "Missing posteriorid"
+  )
 })
\ No newline at end of file
diff --git a/modules/meta.analysis/tests/testthat/test.single.MA.R b/modules/meta.analysis/tests/testthat/test.single.MA.R
deleted file mode 100644
index c24c9c0d2aa..00000000000
--- a/modules/meta.analysis/tests/testthat/test.single.MA.R
+++ /dev/null
@@ -1,5 +0,0 @@
-test_that("`single.MA` gives expected result for example inputs", {
-  ## need to calculate x
-  ## x <- singleMA(....)
-  #expect_equal(round(summary(x)$statistics["beta.o", "Mean"]), 5)
-})

From 2879e6a61f2010ec9222350038fd3521303f4e04 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 19:06:03 +0530
Subject: [PATCH 011/155] updated code to work with CNN in place of random
 forest model

existing code featured a random forest model which was used as the predictive mechanism  for the NA_Downscale function . this is now replaced by a basic CNN in the downscale function . further commits will aim to optimise the performance of this model , while also assisting with measurement of its performance and its visualisation .
---
 .../assim.sequential/R/downscale_function.R   | 140 ++++++++++++------
 1 file changed, 98 insertions(+), 42 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index f8c10575ee8..8dc3ba04718 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -58,34 +58,33 @@ NA_preprocess <- function(data_path, coords_path, date, C_pool) {
 
 ##' @title North America Downscale Function
 ##' @name NA_downscale
-##' @author Joshua Ploshay
+##' @author Joshua Ploshay , Sambhav Dixit
 ##'
-##' @param data  In quotes, file path for .rds containing ensemble data.
-##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @param preprocessed , In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.
 ##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
 ##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
-##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+##' @param covariates_path SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'
-##' @description This function uses the randomForest model.
+##' @description This function uses the Convolutional Neural Network(CNN) model.
 ##'
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-NA_downscale <- function(data, coords, date, C_pool, covariates){
+NA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   
-  # Read the input data and site coordinates
-  input_data <- readRDS(data)
-  site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")
+  input_data <- preprocessed$input_data
+  site_coordinates <- preprocessed$site_coordinates
+  carbon_data <- preprocessed$carbon_data
   
-  # Extract the carbon data for the specified focus year
-  index <- which(names(input_data) == date)
-  data <- input_data[[index]]
-  carbon_data <- as.data.frame(t(data[which(names(data) == C_pool)]))
-  names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
+  # Convert site coordinates to SpatVector
+  site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
+  
+  # Load the covariates raster stack
+  covariates <- rast(covariates_path)
   
   # Extract predictors from covariates raster using site coordinates
-  predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) 
+  predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
   
   # Combine each ensemble member with all predictors
   ensembles <- list()
@@ -95,52 +94,109 @@ NA_downscale <- function(data, coords, date, C_pool, covariates){
   
   # Rename the carbon_data column for each ensemble member
   for (i in 1:length(ensembles)) {
-    ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
+    colnames(ensembles[[i]])[1] <- "carbon_data"
   }
   
   # Split the observations in each data frame into two data frames based on the proportion of 3/4
   ensembles <- lapply(ensembles, function(df) {
-    sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
-    train  <- df[sample, ]
-    test   <- df[-sample, ]
-    split_list <- list(train, test)
+    sample <- sample(1:nrow(df), size = round(0.75 * nrow(df)))
+    train <- df[sample, ]
+    test <- df[-sample, ]
+    split_list <- list(training = train, testing = test)
     return(split_list)
   })
   
-  # Rename the training and testing data frames for each ensemble member
+  # Train a CNN model for each ensemble member using the training data
+  cnn_output <- list()
   for (i in 1:length(ensembles)) {
-    # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
-    names(ensembles[[i]]) <- c("training", "testing")
+    # Prepare data for CNN
+    x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
+    y_train <- as.matrix(ensembles[[i]]$training$carbon_data)
+    x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
+    y_test <- as.matrix(ensembles[[i]]$testing$carbon_data)
+    
+    # Normalize the data
+    x_train <- scale(x_train)
+    x_test <- scale(x_test)
+    
+    # Reshape data for CNN input (samples, timesteps, features)
+    x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+    
+    # Define the CNN model
+    model <- keras_model_sequential() %>%
+      layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) %>%
+      layer_flatten() %>%
+      layer_dense(units = 64, activation = 'relu') %>%
+      layer_dense(units = 1)
+    
+    # Compile the model
+    model %>% compile(
+      loss = 'mean_squared_error',
+      optimizer = optimizer_adam(),
+      metrics = c('mean_absolute_error')
+    )
+    
+    # Train the model
+    model %>% fit(
+      x = x_train,
+      y = y_train,
+      epochs = 100,
+      batch_size = 32,
+      validation_split = 0.2,
+      verbose = 0
+    )
+    
+    cnn_output[[i]] <- model
   }
   
-  # Train a random forest model for each ensemble member using the training data
-  rf_output <- list()
-  for (i in 1:length(ensembles)) {
-    rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
-                                                 data = ensembles[[i]][[1]],
-                                                 ntree = 1000,
-                                                 na.action = stats::na.omit,
-                                                 keep.forest = T,
-                                                 importance = T)
+  # Wrapper function to apply the trained model
+  predict_with_model <- function(model, data) {
+    data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
+    data <- scale(data)
+    data <- array_reshape(data, c(nrow(data), 1, ncol(data)))
+    predictions <- predict(model, data)
+    return(predictions)
   }
   
   # Generate predictions (maps) for each ensemble member using the trained models
-  maps <- list(ncol(rf_output))
-  for (i in 1:length(rf_output)) {
-    maps[[i]] <- terra::predict(object = covariates,
-                                model = rf_output[[i]],na.rm = T)
+  maps <- list()
+  predictions <- list()
+  for (i in 1:length(cnn_output)) {
+    # Prepare data for prediction
+    x_pred <- as.matrix(predictors[, c("tavg", "prec", "srad", "vapr")])
+    x_pred <- scale(x_pred)
+    x_pred <- array_reshape(x_pred, c(nrow(x_pred), 1, ncol(x_pred)))
+    
+    map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]))
+    map_pred <- rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = ext(covariates), crs = crs(covariates))
+    maps[[i]] <- map_pred
+    
+    # Generate predictions for testing data
+    predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing)
+  }
+  
+  # Calculate performance metrics for each ensemble member
+  metrics <- list()
+  for (i in 1:length(predictions)) {
+    actual <- ensembles[[i]]$testing$carbon_data
+    predicted <- predictions[[i]]
+    mse <- mean((actual - predicted)^2)
+    mae <- mean(abs(actual - predicted))
+    r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
+    metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
   }
   
   # Organize the results into a single output list
-  downscale_output <- list(ensembles, rf_output, maps)
+  downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics)
   
   # Rename each element of the output list with appropriate ensemble numbers
-  for (i in 1:length(downscale_output)) {
-    names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
+  for (i in 1:length(downscale_output$data)) {
+    names(downscale_output$data)[i] <- paste0("ensemble", i)
+    names(downscale_output$models)[i] <- paste0("ensemble", i)
+    names(downscale_output$maps)[i] <- paste0("ensemble", i)
+    names(downscale_output$metrics)[i] <- paste0("ensemble", i)
   }
   
-  # Rename the main components of the output list
-  names(downscale_output) <- c("data", "models", "maps")
-  
   return(downscale_output)
 }

From 8de027724ee6a1ccbf48a2e68218fe0d464fa88d Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 19:59:50 +0530
Subject: [PATCH 012/155] runner code for the NA_preprocess and NA_Downscale
 function.

This commit initialises paths to all the necessary files to all the variables needed in the  code by both the functions , then systematically passes it to first the NA_preprocess function and the outputs of it to the NA_Downscale function . finally ends up printing the results .
---
 .../assim.sequential/R/downscale_function.R   | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8dc3ba04718..dd68afe8b5c 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -200,3 +200,24 @@ NA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   
   return(downscale_output)
 }
+
+# Define file paths for the data
+data_path <- " " # Replace with the actual path to your data file
+coords_path <- " "  # Replace with the actual path to your coordinates file
+covariates_path <- " " # Replace with the actual path to your .tiff file
+
+# Define parameters
+date <- " " # Replace with the actual date you want to use
+C_pool <- " " # Replace with the actual carbon pool name you want to use
+
+# Preprocess the data
+preprocessed_data <- preprocess(data_path, coords_path, date, C_pool)
+
+# Run the NA_downscale function
+result <- NA_downscale(preprocessed_data, date, C_pool, covariates_path)
+
+# Print the result
+print(result)
+
+
+

From e3403e6ddfe8545054ca7ed1d045c568c6b1a6aa Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 20:02:58 +0530
Subject: [PATCH 013/155] Printing Evaulation metrics for the model

This commit adds code that prints out the accuracy metrics for each ensemble in the data passed to the model for prediction .
---
 modules/assim.sequential/R/downscale_function.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index dd68afe8b5c..36f05c2e8cc 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -219,5 +219,11 @@ result <- NA_downscale(preprocessed_data, date, C_pool, covariates_path)
 # Print the result
 print(result)
 
+# Print the accuracy metrics
+print("Accuracy Metrics for Each Ensemble:")
+for (i in seq_along(result$metrics)) {
+  cat(paste0("Ensemble ", i, ":"))
+  print(result$metrics[[i]])
+}
 
 

From be698d540d373b59b1135eb9fef7bd7594c02121 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 20:26:44 +0530
Subject: [PATCH 014/155] Prepare metrics data for multi-axis line plot
 visualization

This commit prepares the metrics data from our ensemble model results for visualization in a line plot with multiple y-axes. The main changes are:

1. Data Transformation:
   - Convert the list of metrics for each ensemble into a single dataframe
   - Each row represents one ensemble's metrics
   - Columns include: Ensemble identifier, MSE, MAE, and R-squared

2. Data Reshaping:
   - Melt the dataframe to long format for easier plotting with ggplot2
   - This creates a tidy dataset with columns: Ensemble, variable, value

3. Key Steps:
   - Use lapply() with do.call(rbind, ...) to efficiently combine metrics
   - Create ensemble identifiers (ensemble1, ensemble2, etc.)
   - Use reshape2::melt() for data reshaping

4. Preparation for Visualization:
   - The resulting 'metrics_melted' dataframe is now ready for use with ggplot2
   - This format allows for easy creation of a multi-line plot with separate y-axes for different metrics
---
 modules/assim.sequential/R/downscale_function.R | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 36f05c2e8cc..e94f2ab8900 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -227,3 +227,13 @@ for (i in seq_along(result$metrics)) {
 }
 
 
+# Prepare metrics data for line plot with multiple y-axes
+metrics_df <- do.call(rbind, lapply(seq_along(result$metrics), function(i) {
+  data.frame(Ensemble = paste0("ensemble", i),
+             MSE = result$metrics[[i]]$MSE,
+             MAE = result$metrics[[i]]$MAE,
+             R_squared = result$metrics[[i]]$R_squared)
+}))
+
+# Reshape data for ggplot
+metrics_melted <- reshape2::melt(metrics_df, id.vars = "Ensemble")

From 35f0a6e1d1e040d701fe0cc2a490e552aa4b64f0 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 20:33:49 +0530
Subject: [PATCH 015/155] Create multi-metric line plot for ensemble
 performance visualization

This commit implements a line plot using ggplot2 to visualize the performance metrics (MSE and MAE) across different ensembles. Key aspects of this visualisation include:

1. Data Selection:
   - Filters the melted metrics data to include only MSE and MAE
   - Excludes R-squared for this particular plot (likely due to scale differences)

2. Plot Structure:
   - Uses ggplot2 to create a line plot with points
   - X-axis represents different ensembles
   - Y-axis shows the values for MSE and MAE
   - Colors differentiate between MSE and MAE lines

3. Aesthetic Choices:
   - Implements both lines and points for clear trend visualization and specific value identification
   - Colors are automatically assigned to differentiate metrics
   - X-axis labels are rotated 45 degrees for better readability, especially with many ensembles

4. Labeling:
   - Title clearly states the purpose of the plot
   - X-axis labeled as "Ensemble"
   - Y-axis labeled as "MSE and MAE" to indicate the metrics shown
   - Legend title set to "Metrics" for clarity

5. Scale:
   - Uses a continuous scale for the y-axis, appropriate for MSE and MAE values
   - Single y-axis used for both metrics, assuming their scales are comparable
---
 modules/assim.sequential/R/downscale_function.R | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index e94f2ab8900..33ed9be890d 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -237,3 +237,13 @@ metrics_df <- do.call(rbind, lapply(seq_along(result$metrics), function(i) {
 
 # Reshape data for ggplot
 metrics_melted <- reshape2::melt(metrics_df, id.vars = "Ensemble")
+
+# Create a line plot with multiple y-axes using ggplot2
+p1 <- ggplot(metrics_melted[metrics_melted$variable %in% c("MSE", "MAE"), ], aes(x = Ensemble, y = value, color = variable, group = variable)) +
+  geom_line() +
+  geom_point() +
+  scale_y_continuous(name = "MSE and MAE") +
+  labs(title = "Performance Metrics for Each Ensemble",
+       x = "Ensemble",
+       color = "Metrics") +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))

From 48b7c50b5ef08c05ece0e6e472ca2dcb32478fc5 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 20:37:16 +0530
Subject: [PATCH 016/155] Add R-squared plot and combine with MSE/MAE plot

This commit extends our visualisation by creating a separate plot for R-squared values and combining it with the previously created MSE/MAE plot. Key aspects of this update include:

1. R-squared Plot Creation:
   - Filters the melted metrics data to include only R-squared values
   - Creates a new ggplot object (p2) specifically for R-squared
   - Maintains consistent styling with the MSE/MAE plot for visual coherence

2. Plot Structure:
   - Uses the same x-axis (Ensemble) as the MSE/MAE plot
   - Y-axis now represents R-squared values
   - Implements both line and point geometries for clear trend visualization

3. Aesthetic Consistency:
   - Maintains 45-degree rotation for x-axis labels
   - Uses consistent color scheme (though only one metric is present)
   - Keeps the same theme as the MSE/MAE plot

4. Labeling:
   - X-axis labeled as "Ensemble" (consistent with p1)
   - Y-axis explicitly labeled as "R_squared"
   - Legend title set to "Metrics" for consistency

5. Plot Combination:
   - Uses grid.arrange() from the gridExtra package to combine p1 and p2
   - Arranges plots vertically (ncol = 1) for easy metric comparison across ensembles

6. Advantages of This Approach:
   - Separates R-squared into its own plot, addressing potential scale differences with MSE/MAE
   - Allows for easy visual comparison of all three metrics (MSE, MAE, R-squared) across ensembles
   - Maintains readability by not overcrowding a single plot with three potentially differently-scaled metrics
---
 modules/assim.sequential/R/downscale_function.R | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 33ed9be890d..3949d520121 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -247,3 +247,14 @@ p1 <- ggplot(metrics_melted[metrics_melted$variable %in% c("MSE", "MAE"), ], aes
        x = "Ensemble",
        color = "Metrics") +
   theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+p2 <- ggplot(metrics_melted[metrics_melted$variable == "R_squared", ], aes(x = Ensemble, y = value, color = variable, group = variable)) +
+  geom_line() +
+  geom_point() +
+  scale_y_continuous(name = "R_squared") +
+  labs(x = "Ensemble",
+       color = "Metrics") +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+# Combine the plots
+grid.arrange(p1, p2, ncol = 1)

From ebb32fbd4640be8f0206680645a94531cda825e6 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 20:45:04 +0530
Subject: [PATCH 017/155] Add scatter plot comparing actual vs predicted values
 for ensemble models

This commit introduces a scatter plot to visualize the performance of different ensemble models by comparing actual and predicted values for randomly sampled instances. Key aspects of this update include:

1. Data Preparation:
   - Uses set.seed(123) for reproducibility of random sampling
   - Samples one random instance from each ensemble's predictions
   - Creates a dataframe (sampled_data) with columns for Ensemble, Actual, and Predicted values

2. Scatter Plot Creation:
   - Utilizes ggplot2 to create a comprehensive scatter plot (p3)
   - X-axis represents Actual values, Y-axis represents Predicted values
   - Each ensemble is represented by a unique color

3. Plot Elements:
   - Points:
     * Circular points for actual values
     * Square points for predicted values
   - Lines:
     * Dotted vertical lines connecting actual to predicted values
     * Dashed diagonal line (y=x) representing perfect prediction
     * Solid blue line showing the overall regression trend
   - Shapes are manually defined for clear differentiation between actual and predicted values

4. Aesthetics and Labeling:
   - Title clearly describes the plot's purpose
   - Axes labeled as "Actual" and "Predicted"
   - Legend includes both Ensemble (color) and Type (shape)
   - Uses a minimal theme for clean presentation
   - X-axis labels rotated 45 degrees for better readability

5. Statistical Elements:
   - Includes a regression line (geom_smooth) to show overall trend
   - Omits confidence interval (se = FALSE) for clarity

6. Visualization Insights:
   - Allows for quick assessment of each ensemble's prediction accuracy
   - Facilitates easy comparison of prediction errors across ensembles
   - The y=x line helps in identifying over- or under-predictions

7. Usage:
   - The plot is immediately displayed using print(p3)

8. Potential Future Enhancements:
  - Revise color scheme to reflect data types rather than ensembles
   - Implement a binary color system: e.g., blue for actual data, red for predicted data
   - This change aligns with the conceptual role of ensembles as data groupings rather than distinct entities
   - Binary colouring would emphasise the comparison between actual and predicted values across all ensembles
---
 .../assim.sequential/R/downscale_function.R   | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 3949d520121..cb5c44e80e5 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -258,3 +258,37 @@ p2 <- ggplot(metrics_melted[metrics_melted$variable == "R_squared", ], aes(x = E
 
 # Combine the plots
 grid.arrange(p1, p2, ncol = 1)
+
+# Scatter plot to compare actual and predicted values for each ensemble with one random instance
+set.seed(123)  # For reproducibility
+sampled_data <- do.call(rbind, lapply(seq_along(result$metrics), function(i) {
+  ensemble_name <- names(result$metrics)[i]
+  actual <- result$metrics[[i]]$actual
+  predicted <- result$metrics[[i]]$predicted
+  
+  # Sample one random instance
+  sample_index <- sample(1:length(actual), 1)
+  actual_sample <- actual[sample_index]
+  predicted_sample <- predicted[sample_index]
+  
+  data.frame(Ensemble = ensemble_name, Actual = actual_sample, Predicted = predicted_sample)
+}))
+
+# Create scatter plot with lines connecting actual and predicted values
+p3 <- ggplot(sampled_data, aes(x = Actual, y = Predicted, color = Ensemble)) +
+  geom_point(aes(x = Actual, y = Actual, shape = "Actual"), size = 3) + # Circle for actual values
+  geom_point(aes(x = Predicted, y = Predicted, shape = "Predicted"), size = 3) + # Square for predicted values
+  geom_segment(aes(x = Actual, y = Actual, xend = Actual, yend = Predicted), linetype = "dotted") + # Connect actual to predicted
+  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") +
+  geom_smooth(method = "lm", linetype = "solid", se = FALSE, color = "blue") + # Regression line
+  labs(title = "Actual vs. Predicted Scatter Plot for Random Samples",
+       x = "Actual",
+       y = "Predicted",
+       color = "Ensemble",
+       shape = "Type") +
+  scale_shape_manual(values = c("Actual" = 16, "Predicted" = 22)) + # Define shapes
+  theme_minimal() +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+# Display scatter plot
+print(p3)

From 8cc689fdb2430220f61d2377fd3bb735ca613124 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 11 Jul 2024 21:01:47 +0530
Subject: [PATCH 018/155] Implement Taylor Diagram for ensemble model
 evaluation

This commit introduces a Taylor Diagram to provide a concise visual summary of how well ensemble models match observations. Key aspects of this update include:

1. Taylor Diagram Function:
   - Creates a custom function 'TaylorDiagram' using ggplot2
   - Visualizes three statistical parameters: standard deviation, correlation coefficient, and centered root-mean-square difference
   - Uses a color-coded system to differentiate between ensemble models

2. Data Preparation:
   - Iterates through each ensemble's metrics
   - Calculates required statistics: observed and modelled standard deviations, and correlation
   - Normalizes standard deviations for consistent scaling across ensembles
   - Compiles data into a single dataframe (taylor_data)

3. Plot Structure:
   - X-axis represents normalized modelled standard deviation
   - Y-axis represents normalized observed standard deviation
   - Color distinguishes different ensemble models
   - Uses fixed coordinate system for proper circular representation

4. Aesthetic Choices:
   - Employs rainbow color palette for easy distinction between ensembles
   - Uses minimal theme for clean presentation
   - Places legend on the right for clear model identification

5. Labeling:
   - Title clearly states "Taylor Diagram for Ensemble Members"
   - Axes labeled as "Standard Deviation (normalised)"
   - Color legend labeled as "Model"

6. Visualization Insights:
   - Allows simultaneous comparison of multiple statistical parameters
   - Facilitates easy identification of models closest to observations
   - Provides a compact way to evaluate model performance across ensembles

7. Usage:
   - The plot is created and immediately displayed using print(taylor_plot)

8. Potential Future Enhancements:
   - Revise the color scheme of the Taylor Diagram to better reflect the nature of ensembles
   - Implement a single color for all data points, recognizing that ensembles are essentially data bins rather than distinct entities
   - This change eliminates the implication of metadata that doesn't exist, as the current multi-color scheme suggests differences between ensembles that may not be meaningful
   - Consider using shape or size variations instead of color to distinguish points if needed
   - Add alternative methods to convey ensemble-specific information, such as labels or interactive tooltips
   - This modification will simplify the visual presentation and focus attention on the statistical relationships rather than arbitrary ensemble distinctions
---
 .../assim.sequential/R/downscale_function.R   | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index cb5c44e80e5..1011aaacf2a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -226,6 +226,54 @@ for (i in seq_along(result$metrics)) {
   print(result$metrics[[i]])
 }
 
+# Function to create a Taylor diagram
+TaylorDiagram <- function(taylor_data, ...) {
+  # Convert data to the format required by the plot
+  taylor_data$color <- as.factor(taylor_data$Ensemble)
+  
+  plot <- ggplot(taylor_data, aes(x = Modelled, y = Observed, color = color)) +
+    geom_point(size = 3) +
+    scale_color_manual(values = rainbow(length(unique(taylor_data$color)))) +
+    coord_fixed() +
+    theme_minimal() +
+    labs(title = "Taylor Diagram for Ensemble Members",
+         x = "Standard Deviation (normalised)",
+         y = "Standard Deviation (normalised)",
+         color = "Model") +
+    theme(legend.position = "right")
+  
+  return(plot)
+}
+
+# Prepare data for Taylor diagram
+taylor_data <- data.frame()
+
+for (i in seq_along(result$metrics)) {
+  ensemble_name <- names(result$metrics)[i]
+  actual <- result$metrics[[i]]$actual
+  predicted <- result$metrics[[i]]$predicted
+  
+  # Calculate required statistics
+  obs_sd <- sd(actual)
+  mod_sd <- sd(predicted)
+  correlation <- cor(actual, predicted)
+  
+  # Normalize standard deviations
+  norm_obs_sd <- obs_sd / max(obs_sd, mod_sd)
+  norm_mod_sd <- mod_sd / max(obs_sd, mod_sd)
+  
+  # Add to the data frame
+  taylor_data <- rbind(taylor_data, data.frame(
+    Ensemble = ensemble_name,
+    Observed = norm_obs_sd,
+    Modelled = norm_mod_sd,
+    r = correlation
+  ))
+}
+
+# Create and display Taylor diagram
+taylor_plot <- TaylorDiagram(taylor_data)
+print(taylor_plot)
 
 # Prepare metrics data for line plot with multiple y-axes
 metrics_df <- do.call(rbind, lapply(seq_along(result$metrics), function(i) {

From 064fc30f270bdd79eea77ecb20ecbc0c601bff29 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Fri, 12 Jul 2024 02:31:40 +0530
Subject: [PATCH 019/155] Updated NA_downscale.Rd with changes with regards to
 CNN implementation

updated the .rd file in accordance to the changes made in the original NA_downscale function ,with regards to the CNN implementation in the function .
---
 modules/assim.sequential/man/NA_downscale.Rd | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/modules/assim.sequential/man/NA_downscale.Rd b/modules/assim.sequential/man/NA_downscale.Rd
index 47ce232b088..6cc3eb09eae 100644
--- a/modules/assim.sequential/man/NA_downscale.Rd
+++ b/modules/assim.sequential/man/NA_downscale.Rd
@@ -4,28 +4,26 @@
 \alias{NA_downscale}
 \title{North America Downscale Function}
 \usage{
-NA_downscale(data, coords, date, C_pool, covariates)
+NA_downscale(preprocessed, date, C_pool, covariates_path)
 }
 \arguments{
-\item{data}{In quotes, file path for .rds containing ensemble data.}
-
-\item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
+\item{preprocessed}{, In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.}
 
 \item{date}{In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.}
 
 \item{C_pool}{In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.}
 
-\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+\item{covariates_path}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
 }
 \value{
 It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 }
 \description{
-This function uses the randomForest model.
+This function uses the Convolutional Neural Network(CNN) model.
 }
 \details{
 This function will downscale forecast data to unmodeled locations using covariates and site locations
 }
 \author{
-Joshua Ploshay
+Joshua Ploshay , Sambhav Dixit
 }

From 04d439f6c1a500272e2d069f387672550508b97b Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Fri, 12 Jul 2024 02:36:47 +0530
Subject: [PATCH 020/155] Created NA_preprocess.Rd

created and committed the NA_preprocess.Rd file for the NA_preprocess function , used to preprocess the data and perform checks before passing anything into the NA_downscale function .
---
 modules/assim.sequential/man/NA_preprocess.Rd | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 modules/assim.sequential/man/NA_preprocess.Rd

diff --git a/modules/assim.sequential/man/NA_preprocess.Rd b/modules/assim.sequential/man/NA_preprocess.Rd
new file mode 100644
index 00000000000..1436581468e
--- /dev/null
+++ b/modules/assim.sequential/man/NA_preprocess.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function.R
+\name{NA_preprocess}
+\alias{NA_preprocess}
+\alias{preprocess}
+\title{Preprocess Data for Downscaling}
+\usage{
+preprocess(data_path, coords_path, date, C_pool)
+}
+\arguments{
+\item{data_path}{Character. File path for .rds containing ensemble data.}
+
+\item{coords_path}{Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".}
+
+\item{date}{Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.}
+
+\item{C_pool}{Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.}
+}
+\value{
+A list containing The read .rds data , The cleaned site coordinates ,The extracted and possibly truncated carbon data.
+}
+\description{
+This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
+}
+\details{
+This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
+}
+\author{
+Sambhav Dixit
+}

From e167ca4dd9dcdb68d8e2f4b7cebbc4ee83458ae7 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Fri, 12 Jul 2024 21:16:00 +0530
Subject: [PATCH 021/155] Updated the NA_preprocess to SDA_downscale_preprocess
 , NA_downscale to SDA_downscale

In line with previous discussion , there is nothing specific about this code to NA , instead it is more based on the sda runs , so according to the suggestions made , i've changes the NA_preprocess function to SDA_downscale_preprocess and NA_downscale to SDA_downscale . the preprocess also now aligns with its specificity to being the preprocess for the particular downscale .

future scope :
this change in the code as well as roxygen will be prompted with a change in the .rd files as well , so that should be kept in mind as well .
---
 modules/assim.sequential/R/downscale_function.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 1011aaacf2a..dd631ac0db9 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -1,5 +1,5 @@
 ##' @title Preprocess Data for Downscaling
-##' @name NA_preprocess
+##' @name SDA_downscale_preprocess
 ##' @author Sambhav Dixit
 ##'
 ##' @param data_path Character. File path for .rds containing ensemble data.
@@ -13,7 +13,7 @@
 ##' @return A list containing The read .rds data , The cleaned site coordinates ,The extracted and possibly truncated carbon data.
 
 # Preprocess function to check and clean the data
-NA_preprocess <- function(data_path, coords_path, date, C_pool) {
+SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
   # Read the input data and site coordinates
   input_data <- readRDS(data_path)
   site_coordinates <- read_csv(coords_path)
@@ -57,7 +57,7 @@ NA_preprocess <- function(data_path, coords_path, date, C_pool) {
 }
 
 ##' @title North America Downscale Function
-##' @name NA_downscale
+##' @name SDA_downscale
 ##' @author Joshua Ploshay , Sambhav Dixit
 ##'
 ##' @param preprocessed , In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.
@@ -71,7 +71,7 @@ NA_preprocess <- function(data_path, coords_path, date, C_pool) {
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-NA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
+SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
@@ -211,10 +211,10 @@ date <- " " # Replace with the actual date you want to use
 C_pool <- " " # Replace with the actual carbon pool name you want to use
 
 # Preprocess the data
-preprocessed_data <- preprocess(data_path, coords_path, date, C_pool)
+preprocessed_data <- SDA_downscale_preprocess(data_path, coords_path, date, C_pool)
 
-# Run the NA_downscale function
-result <- NA_downscale(preprocessed_data, date, C_pool, covariates_path)
+# Run the SDA_downscale function
+result <- SDA_downscale(preprocessed_data, date, C_pool, covariates_path)
 
 # Print the result
 print(result)

From e524b5e195b02b78c8876acf7dc9df1ad719b88d Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 15 Jul 2024 14:07:17 +0530
Subject: [PATCH 022/155] refactored code leaving only functions in the code

as suggested , the code in the assim.sequential /R folder should contain only functions and do not require the runner code for this . thus the runner code has been refactored out and will eventually be added to /inst folder as suggested .
---
 .../assim.sequential/R/downscale_function.R   | 140 ------------------
 1 file changed, 140 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index dd631ac0db9..9918d68510c 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -200,143 +200,3 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   
   return(downscale_output)
 }
-
-# Define file paths for the data
-data_path <- " " # Replace with the actual path to your data file
-coords_path <- " "  # Replace with the actual path to your coordinates file
-covariates_path <- " " # Replace with the actual path to your .tiff file
-
-# Define parameters
-date <- " " # Replace with the actual date you want to use
-C_pool <- " " # Replace with the actual carbon pool name you want to use
-
-# Preprocess the data
-preprocessed_data <- SDA_downscale_preprocess(data_path, coords_path, date, C_pool)
-
-# Run the SDA_downscale function
-result <- SDA_downscale(preprocessed_data, date, C_pool, covariates_path)
-
-# Print the result
-print(result)
-
-# Print the accuracy metrics
-print("Accuracy Metrics for Each Ensemble:")
-for (i in seq_along(result$metrics)) {
-  cat(paste0("Ensemble ", i, ":"))
-  print(result$metrics[[i]])
-}
-
-# Function to create a Taylor diagram
-TaylorDiagram <- function(taylor_data, ...) {
-  # Convert data to the format required by the plot
-  taylor_data$color <- as.factor(taylor_data$Ensemble)
-  
-  plot <- ggplot(taylor_data, aes(x = Modelled, y = Observed, color = color)) +
-    geom_point(size = 3) +
-    scale_color_manual(values = rainbow(length(unique(taylor_data$color)))) +
-    coord_fixed() +
-    theme_minimal() +
-    labs(title = "Taylor Diagram for Ensemble Members",
-         x = "Standard Deviation (normalised)",
-         y = "Standard Deviation (normalised)",
-         color = "Model") +
-    theme(legend.position = "right")
-  
-  return(plot)
-}
-
-# Prepare data for Taylor diagram
-taylor_data <- data.frame()
-
-for (i in seq_along(result$metrics)) {
-  ensemble_name <- names(result$metrics)[i]
-  actual <- result$metrics[[i]]$actual
-  predicted <- result$metrics[[i]]$predicted
-  
-  # Calculate required statistics
-  obs_sd <- sd(actual)
-  mod_sd <- sd(predicted)
-  correlation <- cor(actual, predicted)
-  
-  # Normalize standard deviations
-  norm_obs_sd <- obs_sd / max(obs_sd, mod_sd)
-  norm_mod_sd <- mod_sd / max(obs_sd, mod_sd)
-  
-  # Add to the data frame
-  taylor_data <- rbind(taylor_data, data.frame(
-    Ensemble = ensemble_name,
-    Observed = norm_obs_sd,
-    Modelled = norm_mod_sd,
-    r = correlation
-  ))
-}
-
-# Create and display Taylor diagram
-taylor_plot <- TaylorDiagram(taylor_data)
-print(taylor_plot)
-
-# Prepare metrics data for line plot with multiple y-axes
-metrics_df <- do.call(rbind, lapply(seq_along(result$metrics), function(i) {
-  data.frame(Ensemble = paste0("ensemble", i),
-             MSE = result$metrics[[i]]$MSE,
-             MAE = result$metrics[[i]]$MAE,
-             R_squared = result$metrics[[i]]$R_squared)
-}))
-
-# Reshape data for ggplot
-metrics_melted <- reshape2::melt(metrics_df, id.vars = "Ensemble")
-
-# Create a line plot with multiple y-axes using ggplot2
-p1 <- ggplot(metrics_melted[metrics_melted$variable %in% c("MSE", "MAE"), ], aes(x = Ensemble, y = value, color = variable, group = variable)) +
-  geom_line() +
-  geom_point() +
-  scale_y_continuous(name = "MSE and MAE") +
-  labs(title = "Performance Metrics for Each Ensemble",
-       x = "Ensemble",
-       color = "Metrics") +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))
-
-p2 <- ggplot(metrics_melted[metrics_melted$variable == "R_squared", ], aes(x = Ensemble, y = value, color = variable, group = variable)) +
-  geom_line() +
-  geom_point() +
-  scale_y_continuous(name = "R_squared") +
-  labs(x = "Ensemble",
-       color = "Metrics") +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))
-
-# Combine the plots
-grid.arrange(p1, p2, ncol = 1)
-
-# Scatter plot to compare actual and predicted values for each ensemble with one random instance
-set.seed(123)  # For reproducibility
-sampled_data <- do.call(rbind, lapply(seq_along(result$metrics), function(i) {
-  ensemble_name <- names(result$metrics)[i]
-  actual <- result$metrics[[i]]$actual
-  predicted <- result$metrics[[i]]$predicted
-  
-  # Sample one random instance
-  sample_index <- sample(1:length(actual), 1)
-  actual_sample <- actual[sample_index]
-  predicted_sample <- predicted[sample_index]
-  
-  data.frame(Ensemble = ensemble_name, Actual = actual_sample, Predicted = predicted_sample)
-}))
-
-# Create scatter plot with lines connecting actual and predicted values
-p3 <- ggplot(sampled_data, aes(x = Actual, y = Predicted, color = Ensemble)) +
-  geom_point(aes(x = Actual, y = Actual, shape = "Actual"), size = 3) + # Circle for actual values
-  geom_point(aes(x = Predicted, y = Predicted, shape = "Predicted"), size = 3) + # Square for predicted values
-  geom_segment(aes(x = Actual, y = Actual, xend = Actual, yend = Predicted), linetype = "dotted") + # Connect actual to predicted
-  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") +
-  geom_smooth(method = "lm", linetype = "solid", se = FALSE, color = "blue") + # Regression line
-  labs(title = "Actual vs. Predicted Scatter Plot for Random Samples",
-       x = "Actual",
-       y = "Predicted",
-       color = "Ensemble",
-       shape = "Type") +
-  scale_shape_manual(values = c("Actual" = 16, "Predicted" = 22)) + # Define shapes
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))
-
-# Display scatter plot
-print(p3)

From a3a92f2d8db505cb64a2017bd1897b60c2babd36 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 15 Jul 2024 14:58:08 +0530
Subject: [PATCH 023/155] Updated the description of the return type of
 SDA_preprocess function.

Based on the recent suggestion, it appears more meaningful to replace the phrase "extracted and possibly truncated" with "preprocessed." The original wording is somewhat vague, while the latter term is more precise and appropriate for our context.
Making that change in this commit
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 9918d68510c..fa9dce85e20 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -10,7 +10,7 @@
 ##'
 ##' @description This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
 ##'
-##' @return A list containing The read .rds data , The cleaned site coordinates ,The extracted and possibly truncated carbon data.
+##' @return A list containing The read .rds data , The cleaned site coordinates, and the preprocessed carbon data.
 
 # Preprocess function to check and clean the data
 SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {

From 95e9691b52dfa0249e0cf8401b07419d508141b7 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 15 Jul 2024 16:19:42 -0400
Subject: [PATCH 024/155] changelog updated

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e69c58ee70..01f0e45d9b4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
 - Added new feature of preparing initial conditions for MODIS LAI, AGB, ISCN SOC, and soil moisture across NA anchor sites.
 - Added GEDI AGB preparation workflow.
 - Added new feature of downloading datasets from the NASA DAAC ORNL database.
+- Extended downscale function and created 'downscale_hrly' so that it handles more frequent data
 
 ### Fixed
 

From a214027f721f4b5d7ddade2a5c6f6890ef71261a Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 15 Jul 2024 16:30:29 -0400
Subject: [PATCH 025/155] man file created created

---
 modules/assim.sequential/NAMESPACE            |  3 ++
 .../R/downscale_function_hrly.R               | 29 ++++++++++---------
 .../assim.sequential/man/NA_downscale_hrly.Rd | 29 +++++++++++++++++++
 3 files changed, 47 insertions(+), 14 deletions(-)
 create mode 100644 modules/assim.sequential/man/NA_downscale_hrly.Rd

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 8157eadc616..9a06bb92560 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -13,6 +13,7 @@ export(GEF.MultiSite)
 export(GEF.MultiSite.Nimble)
 export(GrabFillMatrix)
 export(Local.support)
+export(NA_downscale_hrly)
 export(Obs.data.prepare.MultiSite)
 export(Prep_OBS_SDA)
 export(Remote_Sync_launcher)
@@ -59,7 +60,9 @@ export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)
+import(ncdf4)
 import(nimble)
+import(terra)
 importFrom(dplyr,"%>%")
 importFrom(lubridate,"%m+%")
 importFrom(magrittr,"%>%")
diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index ff340e16b3c..3d6a10041ce 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -1,17 +1,18 @@
-##' @title North America Downscale Function
-##' @name NA_downscale_hrly
-##' @author Harunobu Ishii
-##'
-##' @param nc_data  In quotes, file path for .nc containing ensemble data.
-##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
-##' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
-##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
-##' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
-##'
-##' @description This function uses the randomForest model.
-##'
-##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-
+#' @title North America Downscale Function
+#' @name NA_downscale_hrly
+#' @author Harunobu Ishii
+#'
+#' @param nc_data  In quotes, file path for .nc containing ensemble data.
+#' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+#' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
+#' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+#' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+#'
+#' @description This function uses the randomForest model.
+#' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+#' @import terra
+#' @import ncdf4
+#' @export
 
 NA_downscale_hrly <- function(nc_data, coords, date, covariates){
   
diff --git a/modules/assim.sequential/man/NA_downscale_hrly.Rd b/modules/assim.sequential/man/NA_downscale_hrly.Rd
new file mode 100644
index 00000000000..1a8984575c7
--- /dev/null
+++ b/modules/assim.sequential/man/NA_downscale_hrly.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function_hrly.R
+\name{NA_downscale_hrly}
+\alias{NA_downscale_hrly}
+\title{North America Downscale Function}
+\usage{
+NA_downscale_hrly(nc_data, coords, date, covariates)
+}
+\arguments{
+\item{nc_data}{In quotes, file path for .nc containing ensemble data.}
+
+\item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
+
+\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
+
+\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+}
+\value{
+It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+}
+\description{
+This function uses the randomForest model.
+}
+\details{
+This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+}
+\author{
+Harunobu Ishii
+}

From 502bf89310b624dc7bf471e0b39c09b74fe7c509 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 15 Jul 2024 16:36:49 -0400
Subject: [PATCH 026/155] import package list updated

---
 modules/assim.sequential/NAMESPACE                   | 1 -
 modules/assim.sequential/R/downscale_function_hrly.R | 1 -
 2 files changed, 2 deletions(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 9a06bb92560..1abf9dea8b9 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -62,7 +62,6 @@ import(furrr)
 import(lubridate)
 import(ncdf4)
 import(nimble)
-import(terra)
 importFrom(dplyr,"%>%")
 importFrom(lubridate,"%m+%")
 importFrom(magrittr,"%>%")
diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 3d6a10041ce..758ea6af7fb 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -10,7 +10,6 @@
 #'
 #' @description This function uses the randomForest model.
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-#' @import terra
 #' @import ncdf4
 #' @export
 

From 9226ef55cd377232816b2716852a68539e529b1e Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 17 Jul 2024 23:52:51 +0530
Subject: [PATCH 027/155] Update SDA_downscale function to use base R pipe
 operator |>

Key changes:
- Replaced `%>%` with `|>` in the keras_model_sequential() chain
- Updated the model compilation step to use `|>`
- Modified the model fitting step to use `|>`

The SDA_downscale_preprocess function remains unchanged as it did not use any pipe operators.

This update improves code consistency and reduces dependency on external packages by leveraging R's built-in pipe operator. It also aligns the code with modern R programming practices.

Note: No functional changes were made to the algorithm itself; this is purely a syntactic update to improve code style and maintainability.
---
 modules/assim.sequential/R/downscale_function.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index fa9dce85e20..8d810e31c67 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -124,21 +124,21 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
     
     # Define the CNN model
-    model <- keras_model_sequential() %>%
-      layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) %>%
-      layer_flatten() %>%
-      layer_dense(units = 64, activation = 'relu') %>%
+    model <- keras_model_sequential() |>
+      layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
+      layer_flatten() |>
+      layer_dense(units = 64, activation = 'relu') |>
       layer_dense(units = 1)
     
     # Compile the model
-    model %>% compile(
+    model |> compile(
       loss = 'mean_squared_error',
       optimizer = optimizer_adam(),
       metrics = c('mean_absolute_error')
     )
     
     # Train the model
-    model %>% fit(
+    model |> fit(
       x = x_train,
       y = y_train,
       epochs = 100,

From f2bab83d06cdc72b738cf110a85ab0d592402a6a Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 18 Jul 2024 00:17:03 +0530
Subject: [PATCH 028/155] Add explicit namespaces for non-base functions

This commit improves code clarity and reduces potential naming conflicts
by adding explicit namespaces to all non-base R functions used in the
SDA_downscale_preprocess and SDA_downscale functions.
Key Changes :
- Updated readr::read_csv() for CSV file reading
- Added terra:: namespace to all terra package functions
- Added keras:: namespace to all keras package functions
- Ensures correct function calls from respective packages
- Improves code maintainability and reduces risk of conflicts with other loaded packages
- No functional changes to the code logic or behavior
- Enhances reproducibility by making package dependencies more explicit
---
 .../assim.sequential/R/downscale_function.R   | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8d810e31c67..21cd0e0ca93 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -16,7 +16,7 @@
 SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
   # Read the input data and site coordinates
   input_data <- readRDS(data_path)
-  site_coordinates <- read_csv(coords_path)
+  site_coordinates <- readr::read_csv(coords_path)
   
   # Ensure the date exists in the input data
   if (!date %in% names(input_data)) {
@@ -81,7 +81,7 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
   
   # Load the covariates raster stack
-  covariates <- rast(covariates_path)
+  covariates <- terra::rast(covariates_path)
   
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
@@ -120,25 +120,25 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     x_test <- scale(x_test)
     
     # Reshape data for CNN input (samples, timesteps, features)
-    x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+    x_train <- keras::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- keras::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
     
     # Define the CNN model
-    model <- keras_model_sequential() |>
-      layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
-      layer_flatten() |>
-      layer_dense(units = 64, activation = 'relu') |>
-      layer_dense(units = 1)
+    model <- keras::keras_model_sequential() |>
+      keras::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
+      keras::layer_flatten() |>
+      keras::layer_dense(units = 64, activation = 'relu') |>
+      keras::layer_dense(units = 1)
     
     # Compile the model
-    model |> compile(
+    model |> keras::compile(
       loss = 'mean_squared_error',
-      optimizer = optimizer_adam(),
+      optimizer = keras::optimizer_adam(),
       metrics = c('mean_absolute_error')
     )
     
     # Train the model
-    model |> fit(
+    model |> keras::fit(
       x = x_train,
       y = y_train,
       epochs = 100,
@@ -154,8 +154,8 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   predict_with_model <- function(model, data) {
     data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
     data <- scale(data)
-    data <- array_reshape(data, c(nrow(data), 1, ncol(data)))
-    predictions <- predict(model, data)
+    data <- keras::array_reshape(data, c(nrow(data), 1, ncol(data)))
+    predictions <- keras::predict(model, data)
     return(predictions)
   }
   
@@ -166,10 +166,10 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     # Prepare data for prediction
     x_pred <- as.matrix(predictors[, c("tavg", "prec", "srad", "vapr")])
     x_pred <- scale(x_pred)
-    x_pred <- array_reshape(x_pred, c(nrow(x_pred), 1, ncol(x_pred)))
+    x_pred <- keras::array_reshape(x_pred, c(nrow(x_pred), 1, ncol(x_pred)))
     
     map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]))
-    map_pred <- rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = ext(covariates), crs = crs(covariates))
+    map_pred <- terra::rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = terra::ext(covariates), crs = terra::crs(covariates))
     maps[[i]] <- map_pred
     
     # Generate predictions for testing data

From cac3c8eaadcf499428e3a48ef18e2bdf7f70bc08 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 18 Jul 2024 04:17:56 +0530
Subject: [PATCH 029/155] Implement dynamic carbon pool naming in SDA_downscale
 function

Key Changes in this commit are as follows :

- Replace fixed "carbon_data" column with dynamic naming (paste0(C_pool, "_ens", i))
- Update CNN model training to use specific carbon pool column names
- Modify metrics calculation to use dynamic column names for each ensemble
- Adjust ensemble processing to handle variable carbon pool names

This change allows for more flexible handling of different carbon pools.
Core functionality remains the same, but now supports multiple named carbon pools.
---
 modules/assim.sequential/R/downscale_function.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 21cd0e0ca93..51e70903c8f 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -94,7 +94,7 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   
   # Rename the carbon_data column for each ensemble member
   for (i in 1:length(ensembles)) {
-    colnames(ensembles[[i]])[1] <- "carbon_data"
+    colnames(ensembles[[i]])[1] <- paste0(C_pool, "_ens", i)
   }
   
   # Split the observations in each data frame into two data frames based on the proportion of 3/4
@@ -111,9 +111,9 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   for (i in 1:length(ensembles)) {
     # Prepare data for CNN
     x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
-    y_train <- as.matrix(ensembles[[i]]$training$carbon_data)
+    y_train <- as.matrix(ensembles[[i]]$training[[paste0(C_pool, "_ens", i)]])
     x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
-    y_test <- as.matrix(ensembles[[i]]$testing$carbon_data)
+    y_test <- as.matrix(ensembles[[i]]$testing[[paste0(C_pool, "_ens", i)]])
     
     # Normalize the data
     x_train <- scale(x_train)
@@ -179,7 +179,7 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
   # Calculate performance metrics for each ensemble member
   metrics <- list()
   for (i in 1:length(predictions)) {
-    actual <- ensembles[[i]]$testing$carbon_data
+    actual <- ensembles[[i]]$testing[[paste0(C_pool, "_ens", i)]]
     predicted <- predictions[[i]]
     mse <- mean((actual - predicted)^2)
     mae <- mean(abs(actual - predicted))

From ecd5aa1549b7ac9e885f0f0c353649be1c3f1819 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 20 Jul 2024 21:35:52 +0530
Subject: [PATCH 030/155] Improve data scaling to ensure consistency across
 train and test sets

This commit addresses the issue of separate scaling for training and testing
data, which could lead to inconsistent data representations. The changes
allow for scaling all data together before splitting into train and test
sets, fixing the original query about rescaling data separately.

Extended description:
- Implemented a single scaling operation for all predictor data before
  splitting into train and test sets.
- Use the same scaling parameters across all ensembles to ensure consistency.
- Apply scaling to each ensemble using the global scaling parameters.
- Modified the prediction process to use the same scaling parameters for
  new data.
- Simplified the predict_with_model function as data is now pre-scaled.

These changes ensure that all data (training, testing, and prediction) are
scaled using the same parameters, addressing the potential issue of different
scaling across different subsets of the data. This approach maintains data
consistency throughout the model training and prediction pipeline, leading
to more reliable and comparable results across all stages of the process.
---
 .../assim.sequential/R/downscale_function.R   | 90 +++++++++++--------
 1 file changed, 53 insertions(+), 37 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 51e70903c8f..ae929a46bfe 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -72,31 +72,54 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
 
 
 SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
-  
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
-  
+
   # Convert site coordinates to SpatVector
   site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
-  
+
   # Load the covariates raster stack
   covariates <- terra::rast(covariates_path)
-  
+
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
-  
+
   # Combine each ensemble member with all predictors
   ensembles <- list()
   for (i in seq_along(carbon_data)) {
     ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
   }
-  
+
   # Rename the carbon_data column for each ensemble member
   for (i in 1:length(ensembles)) {
     colnames(ensembles[[i]])[1] <- paste0(C_pool, "_ens", i)
   }
-  
+
+  # Function to scale data
+  scale_data <- function(data, scale_params = NULL) {
+    if (is.null(scale_params)) {
+      scale_params <- list(
+        center = apply(data, 2, mean),
+        scale = apply(data, 2, sd)
+      )
+    }
+    scaled_data <- scale(data, center = scale_params$center, scale = scale_params$scale)
+    return(list(scaled_data = scaled_data, scale_params = scale_params))
+  }
+
+  # Scale all predictor data together
+  all_predictor_data <- do.call(rbind, lapply(ensembles, function(df) df[, c("tavg", "prec", "srad", "vapr")]))
+  scaled_all <- scale_data(all_predictor_data)
+  scale_params <- scaled_all$scale_params
+
+  # Apply scaling to each ensemble
+  for (i in 1:length(ensembles)) {
+    ensembles[[i]][, c("tavg", "prec", "srad", "vapr")] <- scale(ensembles[[i]][, c("tavg", "prec", "srad", "vapr")], 
+                                                                 center = scale_params$center, 
+                                                                 scale = scale_params$scale)
+  }
+
   # Split the observations in each data frame into two data frames based on the proportion of 3/4
   ensembles <- lapply(ensembles, function(df) {
     sample <- sample(1:nrow(df), size = round(0.75 * nrow(df)))
@@ -105,7 +128,7 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     split_list <- list(training = train, testing = test)
     return(split_list)
   })
-  
+
   # Train a CNN model for each ensemble member using the training data
   cnn_output <- list()
   for (i in 1:length(ensembles)) {
@@ -114,68 +137,61 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     y_train <- as.matrix(ensembles[[i]]$training[[paste0(C_pool, "_ens", i)]])
     x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
     y_test <- as.matrix(ensembles[[i]]$testing[[paste0(C_pool, "_ens", i)]])
-    
-    # Normalize the data
-    x_train <- scale(x_train)
-    x_test <- scale(x_test)
-    
+
     # Reshape data for CNN input (samples, timesteps, features)
-    x_train <- keras::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test <- keras::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-    
+    x_train_reshaped <- keras::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test_reshaped <- keras::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+
     # Define the CNN model
     model <- keras::keras_model_sequential() |>
       keras::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
       keras::layer_flatten() |>
       keras::layer_dense(units = 64, activation = 'relu') |>
       keras::layer_dense(units = 1)
-    
+
     # Compile the model
     model |> keras::compile(
       loss = 'mean_squared_error',
       optimizer = keras::optimizer_adam(),
       metrics = c('mean_absolute_error')
     )
-    
+
     # Train the model
     model |> keras::fit(
-      x = x_train,
+      x = x_train_reshaped,
       y = y_train,
       epochs = 100,
       batch_size = 32,
       validation_split = 0.2,
       verbose = 0
     )
-    
+
     cnn_output[[i]] <- model
   }
-  
+
   # Wrapper function to apply the trained model
   predict_with_model <- function(model, data) {
-    data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
-    data <- scale(data)
-    data <- keras::array_reshape(data, c(nrow(data), 1, ncol(data)))
-    predictions <- keras::predict(model, data)
+    data_reshaped <- keras::array_reshape(data, c(nrow(data), 1, ncol(data)))
+    predictions <- keras::predict(model, data_reshaped)
     return(predictions)
   }
-  
+
   # Generate predictions (maps) for each ensemble member using the trained models
   maps <- list()
   predictions <- list()
   for (i in 1:length(cnn_output)) {
     # Prepare data for prediction
-    x_pred <- as.matrix(predictors[, c("tavg", "prec", "srad", "vapr")])
-    x_pred <- scale(x_pred)
-    x_pred <- keras::array_reshape(x_pred, c(nrow(x_pred), 1, ncol(x_pred)))
+    x_pred <- as.matrix(as.data.frame(covariates[])[, c("tavg", "prec", "srad", "vapr")])
+    x_pred_scaled <- scale(x_pred, center = scale_params$center, scale = scale_params$scale)
     
-    map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]))
+    map_pred <- predict_with_model(cnn_output[[i]], x_pred_scaled)
     map_pred <- terra::rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = terra::ext(covariates), crs = terra::crs(covariates))
     maps[[i]] <- map_pred
-    
+
     # Generate predictions for testing data
-    predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing)
+    predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
   }
-  
+
   # Calculate performance metrics for each ensemble member
   metrics <- list()
   for (i in 1:length(predictions)) {
@@ -186,10 +202,10 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
     metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
   }
-  
+
   # Organize the results into a single output list
-  downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics)
-  
+  downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics, scale_params = scale_params)
+
   # Rename each element of the output list with appropriate ensemble numbers
   for (i in 1:length(downscale_output$data)) {
     names(downscale_output$data)[i] <- paste0("ensemble", i)
@@ -197,6 +213,6 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     names(downscale_output$maps)[i] <- paste0("ensemble", i)
     names(downscale_output$metrics)[i] <- paste0("ensemble", i)
   }
-  
+
   return(downscale_output)
 }

From 5ce2339328dec718109f18873c20f936af421fd6 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 20 Jul 2024 22:06:56 +0530
Subject: [PATCH 031/155] Improve date handling in SDA_downscale_preprocess
 function

This commit enhances the flexibility and robustness of date handling
in the SDA_downscale_preprocess function. It addresses the following
issues and queries:

1. Flexible date format handling:
   - Converts input_data names to a standard YYYY-MM-DD format using
     lubridate::ymd(), allowing for various input date formats.
   - Standardizes the input 'date' parameter to the same format.

2. Preservation of non-date names:
   - Uses ifelse() to only convert valid date names, leaving non-date
     names in input_data unchanged.
   - This addresses the concern: "If input_data has names that aren't
     dates, we may not want to overwrite them".

3. Consistent date comparison:
   - Uses the standardized date format for checking and extracting data,
     ensuring consistency regardless of the original input format.

4. Error handling:
   - Maintains existing error checks for date existence and carbon pool
     presence, now using the standardized date format.

These changes make the function more versatile, allowing it to handle
both "yyyy/mm/dd" and "yyyy-mm-dd" formats (among others) while
preserving the integrity of non-date data in the input.
---
 modules/assim.sequential/R/downscale_function.R | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index ae929a46bfe..f35f80b9b7b 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -18,15 +18,26 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
   input_data <- readRDS(data_path)
   site_coordinates <- readr::read_csv(coords_path)
   
+  # Convert input_data names to standard date format
+  input_date_names <- suppressWarnings(as.character(lubridate::ymd(names(input_data))))
+  names(input_data) <- ifelse(is.na(input_date_names), 
+                              names(input_data),
+                              input_date_names)
+  
+  # Convert the input date to standard format
+  standard_date <- as.character(lubridate::ymd(date))
+  
   # Ensure the date exists in the input data
-  if (!date %in% names(input_data)) {
+  if (!standard_date %in% names(input_data)) {
     stop(paste("Date", date, "not found in the input data."))
   }
   
   # Extract the carbon data for the specified focus year
-  index <- which(names(input_data) == date)
+  index <- which(names(input_data) == standard_date)
   data <- input_data[[index]]
   
+  # Rest of the function remains the same...
+  
   # Ensure the carbon pool exists in the input data
   if (!C_pool %in% names(data)) {
     stop(paste("Carbon pool", C_pool, "not found in the input data."))

From bd7cfa554354debb9553fd7badb790df0fb9647e Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 03:36:25 +0530
Subject: [PATCH 032/155] Refactor SDA_downscale function to accept covariates
 as direct input

- Change covariates parameter from file path to raster object
- Remove terra::rast() call for loading covariates within the function
- Minor adjustments to data preprocessing for improved consistency
---
 .../assim.sequential/R/downscale_function.R   | 116 ++++++++----------
 1 file changed, 51 insertions(+), 65 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index f35f80b9b7b..db4d582ddb9 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -82,55 +82,28 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
+SDA_downscale <- function(preprocessed, date, C_pool, covariates) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
-
+  
   # Convert site coordinates to SpatVector
   site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
-
-  # Load the covariates raster stack
-  covariates <- terra::rast(covariates_path)
-
+  
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
-
+  
   # Combine each ensemble member with all predictors
   ensembles <- list()
   for (i in seq_along(carbon_data)) {
     ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
   }
-
+  
   # Rename the carbon_data column for each ensemble member
   for (i in 1:length(ensembles)) {
     colnames(ensembles[[i]])[1] <- paste0(C_pool, "_ens", i)
   }
-
-  # Function to scale data
-  scale_data <- function(data, scale_params = NULL) {
-    if (is.null(scale_params)) {
-      scale_params <- list(
-        center = apply(data, 2, mean),
-        scale = apply(data, 2, sd)
-      )
-    }
-    scaled_data <- scale(data, center = scale_params$center, scale = scale_params$scale)
-    return(list(scaled_data = scaled_data, scale_params = scale_params))
-  }
-
-  # Scale all predictor data together
-  all_predictor_data <- do.call(rbind, lapply(ensembles, function(df) df[, c("tavg", "prec", "srad", "vapr")]))
-  scaled_all <- scale_data(all_predictor_data)
-  scale_params <- scaled_all$scale_params
-
-  # Apply scaling to each ensemble
-  for (i in 1:length(ensembles)) {
-    ensembles[[i]][, c("tavg", "prec", "srad", "vapr")] <- scale(ensembles[[i]][, c("tavg", "prec", "srad", "vapr")], 
-                                                                 center = scale_params$center, 
-                                                                 scale = scale_params$scale)
-  }
-
+  
   # Split the observations in each data frame into two data frames based on the proportion of 3/4
   ensembles <- lapply(ensembles, function(df) {
     sample <- sample(1:nrow(df), size = round(0.75 * nrow(df)))
@@ -139,70 +112,82 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     split_list <- list(training = train, testing = test)
     return(split_list)
   })
-
+  
   # Train a CNN model for each ensemble member using the training data
   cnn_output <- list()
+  scaling_params <- list()
   for (i in 1:length(ensembles)) {
     # Prepare data for CNN
     x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
     y_train <- as.matrix(ensembles[[i]]$training[[paste0(C_pool, "_ens", i)]])
     x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
     y_test <- as.matrix(ensembles[[i]]$testing[[paste0(C_pool, "_ens", i)]])
-
+    
+    # Calculate scaling parameters from training data
+    scaling_params[[i]] <- list(
+      mean = colMeans(x_train),
+      sd = apply(x_train, 2, sd)
+    )
+    
+    # Normalize the data using training data parameters
+    x_train <- scale(x_train, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
+    x_test <- scale(x_test, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
+    
     # Reshape data for CNN input (samples, timesteps, features)
-    x_train_reshaped <- keras::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test_reshaped <- keras::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-
+    x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+    
     # Define the CNN model
-    model <- keras::keras_model_sequential() |>
-      keras::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
-      keras::layer_flatten() |>
-      keras::layer_dense(units = 64, activation = 'relu') |>
-      keras::layer_dense(units = 1)
-
+    model <- keras_model_sequential() |>
+      layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
+      layer_flatten() |>
+      layer_dense(units = 64, activation = 'relu') |>
+      layer_dense(units = 1)
+    
     # Compile the model
-    model |> keras::compile(
+    model |> compile(
       loss = 'mean_squared_error',
-      optimizer = keras::optimizer_adam(),
+      optimizer = optimizer_adam(),
       metrics = c('mean_absolute_error')
     )
-
+    
     # Train the model
-    model |> keras::fit(
-      x = x_train_reshaped,
+    model |> fit(
+      x = x_train,
       y = y_train,
       epochs = 100,
       batch_size = 32,
       validation_split = 0.2,
       verbose = 0
     )
-
+    
     cnn_output[[i]] <- model
   }
-
+  
   # Wrapper function to apply the trained model
-  predict_with_model <- function(model, data) {
-    data_reshaped <- keras::array_reshape(data, c(nrow(data), 1, ncol(data)))
-    predictions <- keras::predict(model, data_reshaped)
+  predict_with_model <- function(model, data, scaling_params) {
+    data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
+    data <- scale(data, center = scaling_params$mean, scale = scaling_params$sd)
+    data <- array_reshape(data, c(nrow(data), 1, ncol(data)))
+    predictions <- predict(model, data)
     return(predictions)
   }
-
+  
   # Generate predictions (maps) for each ensemble member using the trained models
   maps <- list()
   predictions <- list()
   for (i in 1:length(cnn_output)) {
     # Prepare data for prediction
-    x_pred <- as.matrix(as.data.frame(covariates[])[, c("tavg", "prec", "srad", "vapr")])
-    x_pred_scaled <- scale(x_pred, center = scale_params$center, scale = scale_params$scale)
+    x_pred <- as.matrix(predictors[, c("tavg", "prec", "srad", "vapr")])
     
-    map_pred <- predict_with_model(cnn_output[[i]], x_pred_scaled)
+    map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]), scaling_params[[i]])
     map_pred <- terra::rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = terra::ext(covariates), crs = terra::crs(covariates))
     maps[[i]] <- map_pred
-
+    
     # Generate predictions for testing data
-    predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
+    predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing, scaling_params[[i]])
   }
-
+  
   # Calculate performance metrics for each ensemble member
   metrics <- list()
   for (i in 1:length(predictions)) {
@@ -213,17 +198,18 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates_path) {
     r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
     metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
   }
-
+  
   # Organize the results into a single output list
-  downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics, scale_params = scale_params)
-
+  downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics, scaling_params = scaling_params)
+  
   # Rename each element of the output list with appropriate ensemble numbers
   for (i in 1:length(downscale_output$data)) {
     names(downscale_output$data)[i] <- paste0("ensemble", i)
     names(downscale_output$models)[i] <- paste0("ensemble", i)
     names(downscale_output$maps)[i] <- paste0("ensemble", i)
     names(downscale_output$metrics)[i] <- paste0("ensemble", i)
+    names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
   }
-
+  
   return(downscale_output)
 }

From a870b937757bcb53f643bc06583131c01bad558a Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 03:43:53 +0530
Subject: [PATCH 033/155] Updated description for SDA_downscale parameters

in accordance to the previous change the SDA_downscale now uses the covariates passed in as in argument , instead of the covariates_path passed earlier .
This change has been updated to the documentation in the code itself as well .
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index db4d582ddb9..ce7c0ef0adb 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -74,7 +74,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
 ##' @param preprocessed , In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.
 ##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
 ##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
-##' @param covariates_path SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+##' @param covariates SpatRaster stack, used as predictors in CNN. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'
 ##' @description This function uses the Convolutional Neural Network(CNN) model.

From ca14c09f55aa088c741b23098e4c423492192f22 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 04:24:36 +0530
Subject: [PATCH 034/155] Renaming variables according to nomenclature
 standards

the past works and standard variable names were used as a base or a parameter to work on and refactor the variable names to suit the needs of the project and stay consistent and increase readability of code throughout the project.
---
 .../assim.sequential/R/downscale_function.R    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index ce7c0ef0adb..3b2fff94782 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -13,7 +13,7 @@
 ##' @return A list containing The read .rds data , The cleaned site coordinates, and the preprocessed carbon data.
 
 # Preprocess function to check and clean the data
-SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
+SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
   # Read the input data and site coordinates
   input_data <- readRDS(data_path)
   site_coordinates <- readr::read_csv(coords_path)
@@ -39,11 +39,11 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
   # Rest of the function remains the same...
   
   # Ensure the carbon pool exists in the input data
-  if (!C_pool %in% names(data)) {
-    stop(paste("Carbon pool", C_pool, "not found in the input data."))
+  if (!carbon_pool %in% names(data)) {
+    stop(paste("Carbon pool", carbon_pool, "not found in the input data."))
   }
   
-  carbon_data <- as.data.frame(t(data[which(names(data) == C_pool)]))
+  carbon_data <- as.data.frame(t(data[which(names(data) == carbon_pool)]))
   names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))
   
   # Ensure site coordinates have 'lon' and 'lat' columns
@@ -82,7 +82,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, C_pool) {
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-SDA_downscale <- function(preprocessed, date, C_pool, covariates) {
+SDA_downscale <- function(preprocessed, date, carbon_pool, covariates) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
@@ -101,7 +101,7 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates) {
   
   # Rename the carbon_data column for each ensemble member
   for (i in 1:length(ensembles)) {
-    colnames(ensembles[[i]])[1] <- paste0(C_pool, "_ens", i)
+    colnames(ensembles[[i]])[1] <- paste0(carbon_pool, "_ens", i)
   }
   
   # Split the observations in each data frame into two data frames based on the proportion of 3/4
@@ -119,9 +119,9 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates) {
   for (i in 1:length(ensembles)) {
     # Prepare data for CNN
     x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
-    y_train <- as.matrix(ensembles[[i]]$training[[paste0(C_pool, "_ens", i)]])
+    y_train <- as.matrix(ensembles[[i]]$training[[paste0(carbon_pool, "_ens", i)]])
     x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
-    y_test <- as.matrix(ensembles[[i]]$testing[[paste0(C_pool, "_ens", i)]])
+    y_test <- as.matrix(ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]])
     
     # Calculate scaling parameters from training data
     scaling_params[[i]] <- list(
@@ -191,7 +191,7 @@ SDA_downscale <- function(preprocessed, date, C_pool, covariates) {
   # Calculate performance metrics for each ensemble member
   metrics <- list()
   for (i in 1:length(predictions)) {
-    actual <- ensembles[[i]]$testing[[paste0(C_pool, "_ens", i)]]
+    actual <- ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
     predicted <- predictions[[i]]
     mse <- mean((actual - predicted)^2)
     mae <- mean(abs(actual - predicted))

From 832801f1c3a3edcda1dc05308881e8e1b6fc8831 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 04:30:02 +0530
Subject: [PATCH 035/155] Updated documentation wrt variable nomenclature
 change

wrt the previous commit ca14c09 , the documentation was accordingly changed in the code to allow for it to update with the newer variable names used in line with the standardised nomenclature of variable names in the project .
---
 modules/assim.sequential/R/downscale_function.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 3b2fff94782..a63a1a6b08a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -5,7 +5,7 @@
 ##' @param data_path Character. File path for .rds containing ensemble data.
 ##' @param coords_path Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".
 ##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
-##' @param C_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
+##' @param carbon_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
 ##' @details This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
 ##'
 ##' @description This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
@@ -73,7 +73,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##'
 ##' @param preprocessed , In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.
 ##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
-##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
+##' @param carbon_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
 ##' @param covariates SpatRaster stack, used as predictors in CNN. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'

From ce4a5974f2b2bea5f7bbc4e6cc0bd2ddbc15f5e4 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 22:32:04 +0530
Subject: [PATCH 036/155] Add model selection feature to SDA_downscale function

This commit introduces a new parameter 'model_type' to the SDA_downscale function,
allowing users to choose between Random Forest (RF) and Convolutional Neural Network (CNN)
models for downscaling.

Extended description:
- Added a new 'model_type' parameter to SDA_downscale function
- Implemented conditional logic to execute either RF or CNN based on user selection
- Preserved the original Random Forest implementation
- Integrated the existing CNN implementation
- Adjusted the function to handle different data preprocessing requirements for each model
- Modified the output structure to accommodate model-specific results
- Added error handling for invalid model type selections
- Ensured consistent naming conventions for ensemble outputs across both model types
- Retained all existing functionality while adding the new model selection feature

This change enhances the flexibility of the downscaling process, allowing users to
leverage either Random Forest or CNN methodologies based on their specific needs
or preferences. The implementation maintains backward compatibility with existing
RF-based workflows while introducing the option to use CNN for potentially improved
performance in certain scenarios.
---
 .../assim.sequential/R/downscale_function.R   | 273 +++++++++++-------
 1 file changed, 162 insertions(+), 111 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index a63a1a6b08a..4af676527af 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -82,7 +82,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-SDA_downscale <- function(preprocessed, date, carbon_pool, covariates) {
+SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf") {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
@@ -99,116 +99,167 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates) {
     ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
   }
   
-  # Rename the carbon_data column for each ensemble member
-  for (i in 1:length(ensembles)) {
-    colnames(ensembles[[i]])[1] <- paste0(carbon_pool, "_ens", i)
-  }
-  
-  # Split the observations in each data frame into two data frames based on the proportion of 3/4
-  ensembles <- lapply(ensembles, function(df) {
-    sample <- sample(1:nrow(df), size = round(0.75 * nrow(df)))
-    train <- df[sample, ]
-    test <- df[-sample, ]
-    split_list <- list(training = train, testing = test)
-    return(split_list)
-  })
-  
-  # Train a CNN model for each ensemble member using the training data
-  cnn_output <- list()
-  scaling_params <- list()
-  for (i in 1:length(ensembles)) {
-    # Prepare data for CNN
-    x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
-    y_train <- as.matrix(ensembles[[i]]$training[[paste0(carbon_pool, "_ens", i)]])
-    x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
-    y_test <- as.matrix(ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]])
-    
-    # Calculate scaling parameters from training data
-    scaling_params[[i]] <- list(
-      mean = colMeans(x_train),
-      sd = apply(x_train, 2, sd)
-    )
-    
-    # Normalize the data using training data parameters
-    x_train <- scale(x_train, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
-    x_test <- scale(x_test, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
-    
-    # Reshape data for CNN input (samples, timesteps, features)
-    x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-    
-    # Define the CNN model
-    model <- keras_model_sequential() |>
-      layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
-      layer_flatten() |>
-      layer_dense(units = 64, activation = 'relu') |>
-      layer_dense(units = 1)
-    
-    # Compile the model
-    model |> compile(
-      loss = 'mean_squared_error',
-      optimizer = optimizer_adam(),
-      metrics = c('mean_absolute_error')
-    )
-    
-    # Train the model
-    model |> fit(
-      x = x_train,
-      y = y_train,
-      epochs = 100,
-      batch_size = 32,
-      validation_split = 0.2,
-      verbose = 0
-    )
-    
-    cnn_output[[i]] <- model
-  }
-  
-  # Wrapper function to apply the trained model
-  predict_with_model <- function(model, data, scaling_params) {
-    data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
-    data <- scale(data, center = scaling_params$mean, scale = scaling_params$sd)
-    data <- array_reshape(data, c(nrow(data), 1, ncol(data)))
-    predictions <- predict(model, data)
-    return(predictions)
-  }
-  
-  # Generate predictions (maps) for each ensemble member using the trained models
-  maps <- list()
-  predictions <- list()
-  for (i in 1:length(cnn_output)) {
-    # Prepare data for prediction
-    x_pred <- as.matrix(predictors[, c("tavg", "prec", "srad", "vapr")])
-    
-    map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]), scaling_params[[i]])
-    map_pred <- terra::rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = terra::ext(covariates), crs = terra::crs(covariates))
-    maps[[i]] <- map_pred
-    
-    # Generate predictions for testing data
-    predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing, scaling_params[[i]])
-  }
-  
-  # Calculate performance metrics for each ensemble member
-  metrics <- list()
-  for (i in 1:length(predictions)) {
-    actual <- ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
-    predicted <- predictions[[i]]
-    mse <- mean((actual - predicted)^2)
-    mae <- mean(abs(actual - predicted))
-    r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
-    metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
-  }
-  
-  # Organize the results into a single output list
-  downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics, scaling_params = scaling_params)
-  
-  # Rename each element of the output list with appropriate ensemble numbers
-  for (i in 1:length(downscale_output$data)) {
-    names(downscale_output$data)[i] <- paste0("ensemble", i)
-    names(downscale_output$models)[i] <- paste0("ensemble", i)
-    names(downscale_output$maps)[i] <- paste0("ensemble", i)
-    names(downscale_output$metrics)[i] <- paste0("ensemble", i)
-    names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
+  if (model_type == "rf") {
+    # Rename the carbon_data column for each ensemble member
+    for (i in 1:length(ensembles)) {
+      ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
+    }
+    
+    # Split the observations in each data frame into two data frames based on the proportion of 3/4
+    ensembles <- lapply(ensembles, function(df) {
+      sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
+      train  <- df[sample, ]
+      test   <- df[-sample, ]
+      split_list <- list(train, test)
+      return(split_list)
+    })
+    
+    # Rename the training and testing data frames for each ensemble member
+    for (i in 1:length(ensembles)) {
+      names(ensembles[[i]]) <- c("training", "testing")
+    }
+    
+    # Train a random forest model for each ensemble member using the training data
+    rf_output <- list()
+    for (i in 1:length(ensembles)) {
+      rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ tavg+prec+srad+vapr,
+                                                   data = ensembles[[i]][[1]],
+                                                   ntree = 1000,
+                                                   na.action = stats::na.omit,
+                                                   keep.forest = T,
+                                                   importance = T)
+    }
+    
+    # Generate predictions (maps) for each ensemble member using the trained models
+    maps <- list(ncol(rf_output))
+    for (i in 1:length(rf_output)) {
+      maps[[i]] <- terra::predict(object = covariates,
+                                  model = rf_output[[i]], na.rm = T)
+    }
+    
+    # Organize the results into a single output list
+    downscale_output <- list(ensembles, rf_output, maps)
+    
+    # Rename each element of the output list with appropriate ensemble numbers
+    for (i in 1:length(downscale_output)) {
+      names(downscale_output[[i]]) <- paste0("ensemble", seq(1:length(downscale_output[[i]])))
+    }
+    
+    # Rename the main components of the output list
+    names(downscale_output) <- c("data", "models", "maps")
+    
+  } else if (model_type == "cnn") {
+    # Rename the carbon_data column for each ensemble member
+    for (i in 1:length(ensembles)) {
+      colnames(ensembles[[i]])[1] <- paste0(carbon_pool, "_ens", i)
+    }
+    
+    # Split the observations in each data frame into two data frames based on the proportion of 3/4
+    ensembles <- lapply(ensembles, function(df) {
+      sample <- sample(1:nrow(df), size = round(0.75 * nrow(df)))
+      train <- df[sample, ]
+      test <- df[-sample, ]
+      split_list <- list(training = train, testing = test)
+      return(split_list)
+    })
+    
+    # Train a CNN model for each ensemble member using the training data
+    cnn_output <- list()
+    scaling_params <- list()
+    for (i in 1:length(ensembles)) {
+      # Prepare data for CNN
+      x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
+      y_train <- as.matrix(ensembles[[i]]$training[[paste0(carbon_pool, "_ens", i)]])
+      x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
+      y_test <- as.matrix(ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]])
+      
+      # Calculate scaling parameters from training data
+      scaling_params[[i]] <- list(
+        mean = colMeans(x_train),
+        sd = apply(x_train, 2, sd)
+      )
+      
+      # Normalize the data using training data parameters
+      x_train <- scale(x_train, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
+      x_test <- scale(x_test, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
+      
+      # Reshape data for CNN input (samples, timesteps, features)
+      x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+      x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+      
+      # Define the CNN model
+      model <- keras_model_sequential() |>
+        layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
+        layer_flatten() |>
+        layer_dense(units = 64, activation = 'relu') |>
+        layer_dense(units = 1)
+      
+      # Compile the model
+      model |> compile(
+        loss = 'mean_squared_error',
+        optimizer = optimizer_adam(),
+        metrics = c('mean_absolute_error')
+      )
+      
+      # Train the model
+      model |> fit(
+        x = x_train,
+        y = y_train,
+        epochs = 100,
+        batch_size = 32,
+        validation_split = 0.2,
+        verbose = 0
+      )
+      
+      cnn_output[[i]] <- model
+    }
+    
+    # Wrapper function to apply the trained model
+    predict_with_model <- function(model, data, scaling_params) {
+      data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
+      data <- scale(data, center = scaling_params$mean, scale = scaling_params$sd)
+      data <- array_reshape(data, c(nrow(data), 1, ncol(data)))
+      predictions <- predict(model, data)
+      return(predictions)
+    }
+    
+    # Generate predictions (maps) for each ensemble member using the trained models
+    maps <- list()
+    predictions <- list()
+    for (i in 1:length(cnn_output)) {
+      map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]), scaling_params[[i]])
+      map_pred <- terra::rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = terra::ext(covariates), crs = terra::crs(covariates))
+      maps[[i]] <- map_pred
+      
+      # Generate predictions for testing data
+      predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing, scaling_params[[i]])
+    }
+    
+    # Calculate performance metrics for each ensemble member
+    metrics <- list()
+    for (i in 1:length(predictions)) {
+      actual <- ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
+      predicted <- predictions[[i]]
+      mse <- mean((actual - predicted)^2)
+      mae <- mean(abs(actual - predicted))
+      r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
+      metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
+    }
+    
+    # Organize the results into a single output list
+    downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics, scaling_params = scaling_params)
+    
+    # Rename each element of the output list with appropriate ensemble numbers
+    for (i in 1:length(downscale_output$data)) {
+      names(downscale_output$data)[i] <- paste0("ensemble", i)
+      names(downscale_output$models)[i] <- paste0("ensemble", i)
+      names(downscale_output$maps)[i] <- paste0("ensemble", i)
+      names(downscale_output$metrics)[i] <- paste0("ensemble", i)
+      names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
+    }
+    
+  } else {
+    stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
   
   return(downscale_output)

From d02318fb968d563f632bef9923f4fd992cdba1dd Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 22:41:00 +0530
Subject: [PATCH 037/155] Update SDA_downscale function documentation

This commit updates the documentation for the SDA_downscale function to include
the newly added 'model_type' parameter.

- Added @param description for 'model_type'
- Updated @description to mention both Random Forest and CNN options

These documentation changes ensure that users are fully informed about the new
model selection capability and how to use it effectively.
---
 modules/assim.sequential/R/downscale_function.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 4af676527af..0cabda4a5dd 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -75,14 +75,15 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
 ##' @param carbon_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
 ##' @param covariates SpatRaster stack, used as predictors in CNN. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'
-##' @description This function uses the Convolutional Neural Network(CNN) model.
+##' @description This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
 ##'
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf") {
+SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type ) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data

From ac572da9cb508edb8ad1a63b1d12ff6ee3dcea17 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 23:53:16 +0530
Subject: [PATCH 038/155] Refactor SDA_downscale function to remove metrics
 calculation

- Removed the metrics calculation from within the SDA_downscale function
- Retained predictions for testing data in the output
- Simplified the structure of the output list
- Improved modularity by separating concerns

This change allows for more flexibility in post-processing the downscaling
results. Users can now calculate various metrics or perform additional
analyses on the output without needing to modify or re-run the main
downscaling function. This separation of concerns enhances the function's
reusability and makes it easier to maintain and extend in the future.
---
 modules/assim.sequential/R/downscale_function.R | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 0cabda4a5dd..98b8e092689 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -83,7 +83,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type ) {
+SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
@@ -236,26 +236,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing, scaling_params[[i]])
     }
     
-    # Calculate performance metrics for each ensemble member
-    metrics <- list()
-    for (i in 1:length(predictions)) {
-      actual <- ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
-      predicted <- predictions[[i]]
-      mse <- mean((actual - predicted)^2)
-      mae <- mean(abs(actual - predicted))
-      r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
-      metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
-    }
-    
     # Organize the results into a single output list
-    downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, metrics = metrics, scaling_params = scaling_params)
+    downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, predictions = predictions, scaling_params = scaling_params)
     
     # Rename each element of the output list with appropriate ensemble numbers
     for (i in 1:length(downscale_output$data)) {
       names(downscale_output$data)[i] <- paste0("ensemble", i)
       names(downscale_output$models)[i] <- paste0("ensemble", i)
       names(downscale_output$maps)[i] <- paste0("ensemble", i)
-      names(downscale_output$metrics)[i] <- paste0("ensemble", i)
+      names(downscale_output$predictions)[i] <- paste0("ensemble", i)
       names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
     }
     

From 350278f8d7c8bbfb6bfab7438ea9a50fc5ba215f Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 21 Jul 2024 23:57:21 +0530
Subject: [PATCH 039/155] Add calculate_metrics function for downscaling
 results

- Created new function calculate_metrics to compute performance metrics
- Calculates MSE, MAE, and R-squared for each ensemble
- Takes downscale_output and carbon_pool as inputs
- Returns a list of metrics for each ensemble

This new function allows for flexible post-processing of downscaling results.
Users can now easily compute performance metrics without modifying the main
SDA_downscale function. This separation of concerns improves modularity and
makes it easier to add or modify metrics in the future.

The calculate_metrics function can be called separately after downscaling,
enabling users to compute metrics only when needed and potentially save
computational resources.
---
 .../assim.sequential/R/downscale_function.R   | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 98b8e092689..b520d812582 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -254,3 +254,23 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   
   return(downscale_output)
 }
+
+# New separate function for calculating metrics
+calculate_metrics <- function(downscale_output, carbon_pool) {
+  metrics <- list()
+  
+  for (i in 1:length(downscale_output$data)) {
+    actual <- downscale_output$data[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
+    predicted <- downscale_output$predictions[[i]]
+    
+    mse <- mean((actual - predicted)^2)
+    mae <- mean(abs(actual - predicted))
+    r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
+    
+    metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
+  }
+  
+  names(metrics) <- paste0("ensemble", seq_along(metrics))
+  
+  return(metrics)
+}

From 0c4fb828ffe81708a04e92a5c86c8fa7ed27b344 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 22 Jul 2024 00:05:18 +0530
Subject: [PATCH 040/155] Add documentation comments to calculate_metrics
 function

- Added title, name, and author fields
- Included parameter descriptions for downscale_output and carbon_pool
- Added detailed description of function's purpose and operation
- Provided description of return value and its components
- Ensured consistency with documentation style of other functions
---
 modules/assim.sequential/R/downscale_function.R | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index b520d812582..96b3514dbba 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -255,7 +255,19 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   return(downscale_output)
 }
 
-# New separate function for calculating metrics
+##' @title Calculate Metrics for Downscaling Results
+##' @name calculate_metrics
+##' @author Sambhav Dixit
+##'
+##' @param downscale_output List. Output from the SDA_downscale function, containing data, models, maps, and predictions for each ensemble.
+##' @param carbon_pool Character. Name of the carbon pool used in the downscaling process.
+##'
+##' @details This function calculates performance metrics for the downscaling results. It computes Mean Squared Error (MSE), Mean Absolute Error (MAE), and R-squared for each ensemble. The function uses the actual values from the testing data and the predictions generated during the downscaling process.
+##'
+##' @description This function takes the output from the SDA_downscale function and computes various performance metrics for each ensemble. It provides a way to evaluate the accuracy of the downscaling results without modifying the main downscaling function.
+##'
+##' @return A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data 
+
 calculate_metrics <- function(downscale_output, carbon_pool) {
   metrics <- list()
   

From 7574abc76dca44880b809d4a18ec9c1b87b540d6 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 22 Jul 2024 01:06:41 +0530
Subject: [PATCH 041/155] Refactor SDA_downscale function for improved
 efficiency

This commit significantly refactors the SDA_downscale function to improve
efficiency and consistency. Key changes include:

1. Using terra::predict for both RF and CNN models, which is more efficient
   for large geospatial datasets.
2. Adding a custom cnn_predict function to handle preprocessing for CNN models.
3. Standardizing the output structure between RF and CNN models.
4. Improving efficiency in generating predictions, especially for large datasets.
5. Ensuring consistent naming of ensemble elements in the output for both
   model types.

These changes should result in better performance, especially for large
datasets, while maintaining the same function interface. The refactored
function is now more consistent across model types and should be easier
to maintain and extend in the future.
---
 .../assim.sequential/R/downscale_function.R   | 79 +++++++++++--------
 1 file changed, 46 insertions(+), 33 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 96b3514dbba..23703ebafff 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -83,6 +83,21 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
+##' @title North America Downscale Function
+##' @name SDA_downscale
+##' @author Joshua Ploshay, Sambhav Dixit
+##'
+##' @param preprocessed List. Preprocessed data returned as an output from the SDA_downscale_preprocess function.
+##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
+##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
+##' @param covariates SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.
+##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.
+##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
+##'
+##' @description This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
+##'
+##' @return A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
+
 SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
@@ -132,22 +147,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     }
     
     # Generate predictions (maps) for each ensemble member using the trained models
-    maps <- list(ncol(rf_output))
+    maps <- list()
+    predictions <- list()
     for (i in 1:length(rf_output)) {
-      maps[[i]] <- terra::predict(object = covariates,
-                                  model = rf_output[[i]], na.rm = T)
+      maps[[i]] <- terra::predict(covariates, model = rf_output[[i]], na.rm = TRUE)
+      predictions[[i]] <- predict(rf_output[[i]], ensembles[[i]]$testing)
     }
     
     # Organize the results into a single output list
-    downscale_output <- list(ensembles, rf_output, maps)
-    
-    # Rename each element of the output list with appropriate ensemble numbers
-    for (i in 1:length(downscale_output)) {
-      names(downscale_output[[i]]) <- paste0("ensemble", seq(1:length(downscale_output[[i]])))
-    }
-    
-    # Rename the main components of the output list
-    names(downscale_output) <- c("data", "models", "maps")
+    downscale_output <- list(data = ensembles, models = rf_output, maps = maps, predictions = predictions)
     
   } else if (model_type == "cnn") {
     # Rename the carbon_data column for each ensemble member
@@ -215,43 +223,48 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       cnn_output[[i]] <- model
     }
     
-    # Wrapper function to apply the trained model
-    predict_with_model <- function(model, data, scaling_params) {
-      data <- as.matrix(data[, c("tavg", "prec", "srad", "vapr")])
-      data <- scale(data, center = scaling_params$mean, scale = scaling_params$sd)
-      data <- array_reshape(data, c(nrow(data), 1, ncol(data)))
-      predictions <- predict(model, data)
-      return(predictions)
+    # Custom predict function for CNN
+    cnn_predict <- function(model, newdata, scaling_params) {
+      newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
+      newdata <- array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
+      predictions <- predict(model, newdata)
+      return(as.vector(predictions))
     }
     
     # Generate predictions (maps) for each ensemble member using the trained models
     maps <- list()
     predictions <- list()
     for (i in 1:length(cnn_output)) {
-      map_pred <- predict_with_model(cnn_output[[i]], as.data.frame(covariates[]), scaling_params[[i]])
-      map_pred <- terra::rast(matrix(map_pred, nrow = nrow(covariates), ncol = ncol(covariates)), ext = terra::ext(covariates), crs = terra::crs(covariates))
-      maps[[i]] <- map_pred
+      # Create a SpatRaster with the same properties as covariates
+      prediction_rast <- terra::rast(covariates)
+      
+      # Use terra::predict to apply the CNN model
+      maps[[i]] <- terra::predict(prediction_rast, model = cnn_output[[i]], 
+                                  fun = cnn_predict, 
+                                  scaling_params = scaling_params[[i]])
       
       # Generate predictions for testing data
-      predictions[[i]] <- predict_with_model(cnn_output[[i]], ensembles[[i]]$testing, scaling_params[[i]])
+      test_data <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
+      predictions[[i]] <- cnn_predict(cnn_output[[i]], test_data, scaling_params[[i]])
     }
     
     # Organize the results into a single output list
     downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, predictions = predictions, scaling_params = scaling_params)
-    
-    # Rename each element of the output list with appropriate ensemble numbers
-    for (i in 1:length(downscale_output$data)) {
-      names(downscale_output$data)[i] <- paste0("ensemble", i)
-      names(downscale_output$models)[i] <- paste0("ensemble", i)
-      names(downscale_output$maps)[i] <- paste0("ensemble", i)
-      names(downscale_output$predictions)[i] <- paste0("ensemble", i)
-      names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
-    }
-    
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
   
+  # Rename each element of the output list with appropriate ensemble numbers
+  for (i in 1:length(downscale_output$data)) {
+    names(downscale_output$data)[i] <- paste0("ensemble", i)
+    names(downscale_output$models)[i] <- paste0("ensemble", i)
+    names(downscale_output$maps)[i] <- paste0("ensemble", i)
+    names(downscale_output$predictions)[i] <- paste0("ensemble", i)
+    if (model_type == "cnn") {
+      names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
+    }
+  }
+  
   return(downscale_output)
 }
 

From 6acfd74f2ffd5a21f1e841de0d27bfc0c33d3448 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 22 Jul 2024 01:41:31 +0530
Subject: [PATCH 042/155] Optimize SDA_downscale function and improve covariate
 handling

- Implemented dynamic covariate detection using `names(predictors)`
- Created a single `full_data` frame for all predictors and ensemble data
- Performed single train-test split for all ensemble members
- Dynamically generate Random Forest formula using detected covariates
- Prepare CNN input data once for all ensemble members
- Calculate CNN scaling parameters once using all data
- Updated data structures in output to reflect optimized processing

These changes improve code efficiency and flexibility without
altering the core functionality of the SDA_downscale function.
---
 .../assim.sequential/R/downscale_function.R   | 141 ++++++++----------
 1 file changed, 62 insertions(+), 79 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 23703ebafff..564066285e5 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -109,96 +109,78 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
   
-  # Combine each ensemble member with all predictors
-  ensembles <- list()
-  for (i in seq_along(carbon_data)) {
-    ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
-  }
+  # Dynamically get covariate names
+  covariate_names <- names(predictors)
+  
+  # Create a single data frame with all predictors and ensemble data
+  full_data <- cbind(carbon_data, predictors)
+  
+  # Split the observations into training and testing sets
+  set.seed(123)  # for reproducibility
+  sample <- sample(1:nrow(full_data), size = round(0.75 * nrow(full_data)))
+  train_data <- full_data[sample, ]
+  test_data <- full_data[-sample, ]
   
   if (model_type == "rf") {
-    # Rename the carbon_data column for each ensemble member
-    for (i in 1:length(ensembles)) {
-      ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
-    }
-    
-    # Split the observations in each data frame into two data frames based on the proportion of 3/4
-    ensembles <- lapply(ensembles, function(df) {
-      sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
-      train  <- df[sample, ]
-      test   <- df[-sample, ]
-      split_list <- list(train, test)
-      return(split_list)
-    })
-    
-    # Rename the training and testing data frames for each ensemble member
-    for (i in 1:length(ensembles)) {
-      names(ensembles[[i]]) <- c("training", "testing")
-    }
-    
     # Train a random forest model for each ensemble member using the training data
     rf_output <- list()
-    for (i in 1:length(ensembles)) {
-      rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ tavg+prec+srad+vapr,
-                                                   data = ensembles[[i]][[1]],
+    for (i in seq_along(carbon_data)) {
+      ensemble_col <- paste0("ensemble", i)
+      formula <- as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + ")))
+      rf_output[[i]] <- randomForest::randomForest(formula,
+                                                   data = train_data,
                                                    ntree = 1000,
                                                    na.action = stats::na.omit,
-                                                   keep.forest = T,
-                                                   importance = T)
+                                                   keep.forest = TRUE,
+                                                   importance = TRUE)
     }
     
     # Generate predictions (maps) for each ensemble member using the trained models
     maps <- list()
     predictions <- list()
-    for (i in 1:length(rf_output)) {
+    for (i in seq_along(rf_output)) {
       maps[[i]] <- terra::predict(covariates, model = rf_output[[i]], na.rm = TRUE)
-      predictions[[i]] <- predict(rf_output[[i]], ensembles[[i]]$testing)
+      predictions[[i]] <- predict(rf_output[[i]], test_data)
     }
     
     # Organize the results into a single output list
-    downscale_output <- list(data = ensembles, models = rf_output, maps = maps, predictions = predictions)
+    downscale_output <- list(
+      data = list(training = train_data, testing = test_data),
+      models = rf_output,
+      maps = maps,
+      predictions = predictions
+    )
     
   } else if (model_type == "cnn") {
-    # Rename the carbon_data column for each ensemble member
-    for (i in 1:length(ensembles)) {
-      colnames(ensembles[[i]])[1] <- paste0(carbon_pool, "_ens", i)
-    }
+    # Prepare data for CNN
+    x_data <- as.matrix(full_data[, covariate_names])
+    y_data <- as.matrix(carbon_data)
+    
+    # Calculate scaling parameters from all data
+    scaling_params <- list(
+      mean = colMeans(x_data),
+      sd = apply(x_data, 2, sd)
+    )
     
-    # Split the observations in each data frame into two data frames based on the proportion of 3/4
-    ensembles <- lapply(ensembles, function(df) {
-      sample <- sample(1:nrow(df), size = round(0.75 * nrow(df)))
-      train <- df[sample, ]
-      test <- df[-sample, ]
-      split_list <- list(training = train, testing = test)
-      return(split_list)
-    })
+    # Normalize the data
+    x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
     
-    # Train a CNN model for each ensemble member using the training data
+    # Split into training and testing sets
+    x_train <- x_data_scaled[sample, ]
+    x_test <- x_data_scaled[-sample, ]
+    y_train <- y_data[sample, ]
+    y_test <- y_data[-sample, ]
+    
+    # Reshape data for CNN input (samples, timesteps, features)
+    x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+    
+    # Train a CNN model for each ensemble member
     cnn_output <- list()
-    scaling_params <- list()
-    for (i in 1:length(ensembles)) {
-      # Prepare data for CNN
-      x_train <- as.matrix(ensembles[[i]]$training[, c("tavg", "prec", "srad", "vapr")])
-      y_train <- as.matrix(ensembles[[i]]$training[[paste0(carbon_pool, "_ens", i)]])
-      x_test <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
-      y_test <- as.matrix(ensembles[[i]]$testing[[paste0(carbon_pool, "_ens", i)]])
-      
-      # Calculate scaling parameters from training data
-      scaling_params[[i]] <- list(
-        mean = colMeans(x_train),
-        sd = apply(x_train, 2, sd)
-      )
-      
-      # Normalize the data using training data parameters
-      x_train <- scale(x_train, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
-      x_test <- scale(x_test, center = scaling_params[[i]]$mean, scale = scaling_params[[i]]$sd)
-      
-      # Reshape data for CNN input (samples, timesteps, features)
-      x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-      x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-      
+    for (i in seq_along(carbon_data)) {
       # Define the CNN model
       model <- keras_model_sequential() |>
-        layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, 4)) |>
+        layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
         layer_flatten() |>
         layer_dense(units = 64, activation = 'relu') |>
         layer_dense(units = 1)
@@ -213,7 +195,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       # Train the model
       model |> fit(
         x = x_train,
-        y = y_train,
+        y = y_train[, i],
         epochs = 100,
         batch_size = 32,
         validation_split = 0.2,
@@ -234,35 +216,36 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     # Generate predictions (maps) for each ensemble member using the trained models
     maps <- list()
     predictions <- list()
-    for (i in 1:length(cnn_output)) {
+    for (i in seq_along(cnn_output)) {
       # Create a SpatRaster with the same properties as covariates
       prediction_rast <- terra::rast(covariates)
       
       # Use terra::predict to apply the CNN model
       maps[[i]] <- terra::predict(prediction_rast, model = cnn_output[[i]], 
                                   fun = cnn_predict, 
-                                  scaling_params = scaling_params[[i]])
+                                  scaling_params = scaling_params)
       
       # Generate predictions for testing data
-      test_data <- as.matrix(ensembles[[i]]$testing[, c("tavg", "prec", "srad", "vapr")])
-      predictions[[i]] <- cnn_predict(cnn_output[[i]], test_data, scaling_params[[i]])
+      predictions[[i]] <- cnn_predict(cnn_output[[i]], x_data[-sample, ], scaling_params)
     }
     
     # Organize the results into a single output list
-    downscale_output <- list(data = ensembles, models = cnn_output, maps = maps, predictions = predictions, scaling_params = scaling_params)
+    downscale_output <- list(
+      data = list(training = train_data, testing = test_data),
+      models = cnn_output,
+      maps = maps,
+      predictions = predictions,
+      scaling_params = scaling_params
+    )
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
   
   # Rename each element of the output list with appropriate ensemble numbers
-  for (i in 1:length(downscale_output$data)) {
-    names(downscale_output$data)[i] <- paste0("ensemble", i)
+  for (i in seq_along(carbon_data)) {
     names(downscale_output$models)[i] <- paste0("ensemble", i)
     names(downscale_output$maps)[i] <- paste0("ensemble", i)
     names(downscale_output$predictions)[i] <- paste0("ensemble", i)
-    if (model_type == "cnn") {
-      names(downscale_output$scaling_params)[i] <- paste0("ensemble", i)
-    }
   }
   
   return(downscale_output)

From 5b6f5775b091336beccce964772efbe5feade470 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 22 Jul 2024 02:21:08 +0530
Subject: [PATCH 043/155] Create SDA_downscale.Rd

created and committed the new SDA_downscale.Rd to account for the changes made to the code so far
---
 modules/assim.sequential/man/SDA_downscale.Rd | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 modules/assim.sequential/man/SDA_downscale.Rd

diff --git a/modules/assim.sequential/man/SDA_downscale.Rd b/modules/assim.sequential/man/SDA_downscale.Rd
new file mode 100644
index 00000000000..79e05cf9386
--- /dev/null
+++ b/modules/assim.sequential/man/SDA_downscale.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function.R
+\name{SDA_downscale}
+\alias{SDA_downscale}
+\title{North America Downscale Function}
+\usage{
+SDA_downscale(preprocessed, date, carbon_pool, covariates, model_type)
+}
+\arguments{
+\item{preprocessed}{List. Preprocessed data returned as an output from the SDA_downscale_preprocess function.}
+
+\item{date}{Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.}
+
+\item{carbon_pool}{Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.}
+
+\item{covariates}{SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.}
+
+\item{model_type}{Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.}
+}
+\value{
+It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+
+A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
+}
+\description{
+This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
+
+This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
+}
+\details{
+This function will downscale forecast data to unmodeled locations using covariates and site locations
+
+This function will downscale forecast data to unmodeled locations using covariates and site locations
+}
+\author{
+Joshua Ploshay , Sambhav Dixit
+
+Joshua Ploshay, Sambhav Dixit
+}

From 50ee452db6805dbc08391a436f3bfa649206a177 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 22 Jul 2024 02:25:56 +0530
Subject: [PATCH 044/155] Create SDA_downscale_preprocess.Rd

created and committed the new SDA_downscale_preprocess.Rd to account for the changes made to the code so far
---
 .../man/SDA_downscale_preprocess.Rd           | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 modules/assim.sequential/man/SDA_downscale_preprocess.Rd

diff --git a/modules/assim.sequential/man/SDA_downscale_preprocess.Rd b/modules/assim.sequential/man/SDA_downscale_preprocess.Rd
new file mode 100644
index 00000000000..0bed11f4ba9
--- /dev/null
+++ b/modules/assim.sequential/man/SDA_downscale_preprocess.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function.R
+\name{SDA_downscale_preprocess}
+\alias{SDA_downscale_preprocess}
+\title{Preprocess Data for Downscaling}
+\usage{
+SDA_downscale_preprocess(data_path, coords_path, date, carbon_pool)
+}
+\arguments{
+\item{data_path}{Character. File path for .rds containing ensemble data.}
+
+\item{coords_path}{Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".}
+
+\item{date}{Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.}
+
+\item{carbon_pool}{Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.}
+}
+\value{
+A list containing The read .rds data , The cleaned site coordinates, and the preprocessed carbon data.
+}
+\description{
+This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
+}
+\details{
+This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
+}
+\author{
+Sambhav Dixit
+}

From f812daa065987e461e928294785fc781ef6184b9 Mon Sep 17 00:00:00 2001
From: sambhavnoobcoder <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 22 Jul 2024 02:28:48 +0530
Subject: [PATCH 045/155] Create calculate_metrics.Rd

created and committed the calculate_metrics to account for the changes made in terms of moving the evaluation out of the downscale function
---
 .../assim.sequential/man/calculate_metrics.Rd | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 modules/assim.sequential/man/calculate_metrics.Rd

diff --git a/modules/assim.sequential/man/calculate_metrics.Rd b/modules/assim.sequential/man/calculate_metrics.Rd
new file mode 100644
index 00000000000..f603c84707f
--- /dev/null
+++ b/modules/assim.sequential/man/calculate_metrics.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function.R
+\name{calculate_metrics}
+\alias{calculate_metrics}
+\title{Calculate Metrics for Downscaling Results}
+\usage{
+calculate_metrics(downscale_output, carbon_pool)
+}
+\arguments{
+\item{downscale_output}{List. Output from the SDA_downscale function, containing data, models, maps, and predictions for each ensemble.}
+
+\item{carbon_pool}{Character. Name of the carbon pool used in the downscaling process.}
+}
+\value{
+A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data
+}
+\description{
+This function takes the output from the SDA_downscale function and computes various performance metrics for each ensemble. It provides a way to evaluate the accuracy of the downscaling results without modifying the main downscaling function.
+}
+\details{
+This function calculates performance metrics for the downscaling results. It computes Mean Squared Error (MSE), Mean Absolute Error (MAE), and R-squared for each ensemble. The function uses the actual values from the testing data and the predictions generated during the downscaling process.
+}
+\author{
+Sambhav Dixit
+}

From f55c2de81b3523c6bbe86382087cc87be675188e Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 02:33:45 +0530
Subject: [PATCH 046/155] Delete NA_downscale.Rd

adding this commit to delete the NA_downscale.Rd from my branch . this catches up the roxygen to the present version of the code to a large extent .
---
 modules/assim.sequential/man/NA_downscale.Rd | 29 --------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 modules/assim.sequential/man/NA_downscale.Rd

diff --git a/modules/assim.sequential/man/NA_downscale.Rd b/modules/assim.sequential/man/NA_downscale.Rd
deleted file mode 100644
index 6cc3eb09eae..00000000000
--- a/modules/assim.sequential/man/NA_downscale.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/downscale_function.R
-\name{NA_downscale}
-\alias{NA_downscale}
-\title{North America Downscale Function}
-\usage{
-NA_downscale(preprocessed, date, C_pool, covariates_path)
-}
-\arguments{
-\item{preprocessed}{, In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.}
-
-\item{date}{In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.}
-
-\item{C_pool}{In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.}
-
-\item{covariates_path}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
-}
-\value{
-It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-}
-\description{
-This function uses the Convolutional Neural Network(CNN) model.
-}
-\details{
-This function will downscale forecast data to unmodeled locations using covariates and site locations
-}
-\author{
-Joshua Ploshay , Sambhav Dixit
-}

From d751ffc60fad5c5dbf9030c0e9b938a65096dc36 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 02:35:43 +0530
Subject: [PATCH 047/155] Delete NA_preprocess.Rd

Much similar to its downscale companion , the preprocess function is also replaced with its SDA counterpart in the roxygen documentation , hence the removal of this file is imminent .
---
 modules/assim.sequential/man/NA_preprocess.Rd | 30 -------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 modules/assim.sequential/man/NA_preprocess.Rd

diff --git a/modules/assim.sequential/man/NA_preprocess.Rd b/modules/assim.sequential/man/NA_preprocess.Rd
deleted file mode 100644
index 1436581468e..00000000000
--- a/modules/assim.sequential/man/NA_preprocess.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/downscale_function.R
-\name{NA_preprocess}
-\alias{NA_preprocess}
-\alias{preprocess}
-\title{Preprocess Data for Downscaling}
-\usage{
-preprocess(data_path, coords_path, date, C_pool)
-}
-\arguments{
-\item{data_path}{Character. File path for .rds containing ensemble data.}
-
-\item{coords_path}{Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".}
-
-\item{date}{Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.}
-
-\item{C_pool}{Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.}
-}
-\value{
-A list containing The read .rds data , The cleaned site coordinates ,The extracted and possibly truncated carbon data.
-}
-\description{
-This function reads and checks the input data, ensuring that the required date and carbon pool exist, and that the site coordinates are valid.
-}
-\details{
-This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
-}
-\author{
-Sambhav Dixit
-}

From 06bf26bf85e342b6b9189d8b924d5ec19ed1ddb0 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 02:45:57 +0530
Subject: [PATCH 048/155] Renamed function from calculate_metrics to
 SDA_downscale_metrics

in accords to suggestions and the potential conflict it could cause with the metrics function in the benchmarking model , and also its specific and nice application in the downscaling function , it would seem apt to rename it to SDA_downscale_metircs ( since the past two functions are also SDA reliant )
---
 modules/assim.sequential/R/downscale_function.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 564066285e5..0339c3137a4 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -252,7 +252,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
 }
 
 ##' @title Calculate Metrics for Downscaling Results
-##' @name calculate_metrics
+##' @name SDA_downscale_metrics
 ##' @author Sambhav Dixit
 ##'
 ##' @param downscale_output List. Output from the SDA_downscale function, containing data, models, maps, and predictions for each ensemble.
@@ -264,7 +264,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
 ##'
 ##' @return A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data 
 
-calculate_metrics <- function(downscale_output, carbon_pool) {
+SDA_downscale_metrics <- function(downscale_output, carbon_pool) {
   metrics <- list()
   
   for (i in 1:length(downscale_output$data)) {

From bb6614200471192b697c84a1ed87be8720272f9b Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:02:38 +0530
Subject: [PATCH 049/155] Refactor SDA_downscale function data prep snippet for
 improved efficiency

- Move common data preparation tasks before model-specific code
- Implement data scaling and splitting for both RF and CNN models
- Reduce code duplication by preparing data once for both model types
- Improve code readability and maintainability
---
 .../assim.sequential/R/downscale_function.R   | 58 +++++++++----------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 0339c3137a4..8ca5d79ed1e 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -102,25 +102,44 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
-  
+
   # Convert site coordinates to SpatVector
   site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
-  
+
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
-  
+
   # Dynamically get covariate names
   covariate_names <- names(predictors)
-  
+
   # Create a single data frame with all predictors and ensemble data
   full_data <- cbind(carbon_data, predictors)
-  
+
   # Split the observations into training and testing sets
   set.seed(123)  # for reproducibility
   sample <- sample(1:nrow(full_data), size = round(0.75 * nrow(full_data)))
   train_data <- full_data[sample, ]
   test_data <- full_data[-sample, ]
-  
+
+  # Prepare data for both RF and CNN
+  x_data <- as.matrix(full_data[, covariate_names])
+  y_data <- as.matrix(carbon_data)
+
+  # Calculate scaling parameters from all data
+  scaling_params <- list(
+    mean = colMeans(x_data),
+    sd = apply(x_data, 2, sd)
+  )
+
+  # Normalize the data
+  x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
+
+  # Split into training and testing sets
+  x_train <- x_data_scaled[sample, ]
+  x_test <- x_data_scaled[-sample, ]
+  y_train <- y_data[sample, ]
+  y_test <- y_data[-sample, ]
+
   if (model_type == "rf") {
     # Train a random forest model for each ensemble member using the training data
     rf_output <- list()
@@ -152,25 +171,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     )
     
   } else if (model_type == "cnn") {
-    # Prepare data for CNN
-    x_data <- as.matrix(full_data[, covariate_names])
-    y_data <- as.matrix(carbon_data)
-    
-    # Calculate scaling parameters from all data
-    scaling_params <- list(
-      mean = colMeans(x_data),
-      sd = apply(x_data, 2, sd)
-    )
-    
-    # Normalize the data
-    x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
-    
-    # Split into training and testing sets
-    x_train <- x_data_scaled[sample, ]
-    x_test <- x_data_scaled[-sample, ]
-    y_train <- y_data[sample, ]
-    y_test <- y_data[-sample, ]
-    
     # Reshape data for CNN input (samples, timesteps, features)
     x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
     x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
@@ -221,8 +221,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       prediction_rast <- terra::rast(covariates)
       
       # Use terra::predict to apply the CNN model
-      maps[[i]] <- terra::predict(prediction_rast, model = cnn_output[[i]], 
-                                  fun = cnn_predict, 
+      maps[[i]] <- terra::predict(prediction_rast, model = cnn_output[[i]],
+                                  fun = cnn_predict,
                                   scaling_params = scaling_params)
       
       # Generate predictions for testing data
@@ -240,14 +240,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
-  
+
   # Rename each element of the output list with appropriate ensemble numbers
   for (i in seq_along(carbon_data)) {
     names(downscale_output$models)[i] <- paste0("ensemble", i)
     names(downscale_output$maps)[i] <- paste0("ensemble", i)
     names(downscale_output$predictions)[i] <- paste0("ensemble", i)
   }
-  
+
   return(downscale_output)
 }
 

From 4d2c6a54db51315b872c9d3ba2ce1b527e459df4 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:24:55 +0530
Subject: [PATCH 050/155] Update SDA_downscale function to make seed optional

- Add optional 'seed' parameter to SDA_downscale function
- Set seed only if explicitly provided by user
- Default seed to NULL for flexibility in random number generation
- Improve reproducibility options while avoiding unintended consequences
- Address potential issues with hard-coded seed value
---
 .../assim.sequential/R/downscale_function.R   | 30 ++++++++++---------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8ca5d79ed1e..bf27d7208f4 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -98,48 +98,50 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##'
 ##' @return A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
 
-SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type) {
+SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type, seed = NULL) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
-
+  
   # Convert site coordinates to SpatVector
   site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
-
+  
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
-
+  
   # Dynamically get covariate names
   covariate_names <- names(predictors)
-
+  
   # Create a single data frame with all predictors and ensemble data
   full_data <- cbind(carbon_data, predictors)
-
+  
   # Split the observations into training and testing sets
-  set.seed(123)  # for reproducibility
+  if (!is.null(seed)) {
+    set.seed(seed)  # Only set seed if provided
+  }
   sample <- sample(1:nrow(full_data), size = round(0.75 * nrow(full_data)))
   train_data <- full_data[sample, ]
   test_data <- full_data[-sample, ]
-
+  
   # Prepare data for both RF and CNN
   x_data <- as.matrix(full_data[, covariate_names])
   y_data <- as.matrix(carbon_data)
-
+  
   # Calculate scaling parameters from all data
   scaling_params <- list(
     mean = colMeans(x_data),
     sd = apply(x_data, 2, sd)
   )
-
+  
   # Normalize the data
   x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
-
+  
   # Split into training and testing sets
   x_train <- x_data_scaled[sample, ]
   x_test <- x_data_scaled[-sample, ]
   y_train <- y_data[sample, ]
   y_test <- y_data[-sample, ]
-
+  
   if (model_type == "rf") {
     # Train a random forest model for each ensemble member using the training data
     rf_output <- list()
@@ -240,14 +242,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
-
+  
   # Rename each element of the output list with appropriate ensemble numbers
   for (i in seq_along(carbon_data)) {
     names(downscale_output$models)[i] <- paste0("ensemble", i)
     names(downscale_output$maps)[i] <- paste0("ensemble", i)
     names(downscale_output$predictions)[i] <- paste0("ensemble", i)
   }
-
+  
   return(downscale_output)
 }
 

From 7e97841635d1c38593cc125e0952720d78e6b5ce Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:32:10 +0530
Subject: [PATCH 051/155] Update SDA_downscale function documentation to
 improve seeding methods in the function

- Add @param for new 'seed' parameter in function comment
- Describe seed as optional with NULL default
- Improve function documentation for better usability
---
 modules/assim.sequential/R/downscale_function.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index bf27d7208f4..856b021dc8a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -92,6 +92,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
 ##' @param covariates SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.
 ##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.
+##' @param seed Numeric or NULL. Optional seed for random number generation. Default is NULL.
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'
 ##' @description This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.

From fe5699d995f999fb05f71735b2db0e9da2c25f97 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:41:03 +0530
Subject: [PATCH 052/155] set default model type

-set a model to be the default model_type
- chose random forest as default model_type due to its lower inference time and lesser hardware requirements compared to cnn .
- user can switch the model type by specifying the specific model they want to use in the arguments .
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 856b021dc8a..3cf8c8dd05a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -99,7 +99,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##'
 ##' @return A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
 
-SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type, seed = NULL) {
+SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf", seed = NULL) {
   input_data <- preprocessed$input_data
   site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data

From a20389f2e706ddcb6b5cd640abbcf8557a78c742 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:43:25 +0530
Subject: [PATCH 053/155] Updated documentation for Default argument

-modified the existing documentation with the default model choice
-specified that the default model choice would be the random forest model .
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 3cf8c8dd05a..6fe528fbf2a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -91,7 +91,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
 ##' @param covariates SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.
-##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.
+##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network. Default is Random Forest.
 ##' @param seed Numeric or NULL. Optional seed for random number generation. Default is NULL.
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'

From 1dd9e6cdfb60b3fae0852a08970d0237af48da70 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:49:43 +0530
Subject: [PATCH 054/155] Removed extra roxygen block

Seems like I ended up with an extra Roxygen block so I removed it .
---
 modules/assim.sequential/R/downscale_function.R | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 6fe528fbf2a..e402c575b38 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -67,22 +67,6 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
-##' @title North America Downscale Function
-##' @name SDA_downscale
-##' @author Joshua Ploshay , Sambhav Dixit
-##'
-##' @param preprocessed , In quotes, prepocessed data returned as an output for passing the raw data to the NA_preprocess function.
-##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
-##' @param carbon_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
-##' @param covariates SpatRaster stack, used as predictors in CNN. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
-##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.
-##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
-##'
-##' @description This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
-##'
-##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-
-
 ##' @title North America Downscale Function
 ##' @name SDA_downscale
 ##' @author Joshua Ploshay, Sambhav Dixit

From 35f0b3e780bab34a66392c992f5c507a90e703a2 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 03:58:56 +0530
Subject: [PATCH 055/155] modified title of SDA_downscale function

-Modified the existing title of the downscale function
-North America downscale function wasn't an apt title to the function , as discussed in previous conversation .
-Function has been named as SDA downscale so its only apt to include that instead of North America in the title .
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index e402c575b38..3ff6793767e 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -67,7 +67,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
-##' @title North America Downscale Function
+##' @title SDA Downscale Function
 ##' @name SDA_downscale
 ##' @author Joshua Ploshay, Sambhav Dixit
 ##'

From 91236acea5f1b528d8579c0e6152564aedcac5cc Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 04:50:57 +0530
Subject: [PATCH 056/155] Keeping date as a Date type

- It was irrelevant to treat date like a character as discussed in previous discussions
- made modification removing it from being repeatedly converted  into a character .
---
 .../assim.sequential/R/downscale_function.R    | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 3ff6793767e..8b929877e6d 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -14,31 +14,21 @@
 
 # Preprocess function to check and clean the data
 SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
-  # Read the input data and site coordinates
   input_data <- readRDS(data_path)
   site_coordinates <- readr::read_csv(coords_path)
   
-  # Convert input_data names to standard date format
-  input_date_names <- suppressWarnings(as.character(lubridate::ymd(names(input_data))))
-  names(input_data) <- ifelse(is.na(input_date_names), 
-                              names(input_data),
-                              input_date_names)
+  input_date_names <- lubridate::ymd(names(input_data))
+  names(input_data) <- input_date_names
   
-  # Convert the input date to standard format
-  standard_date <- as.character(lubridate::ymd(date))
+  standard_date <- lubridate::ymd(date)
   
-  # Ensure the date exists in the input data
   if (!standard_date %in% names(input_data)) {
     stop(paste("Date", date, "not found in the input data."))
   }
   
-  # Extract the carbon data for the specified focus year
   index <- which(names(input_data) == standard_date)
   data <- input_data[[index]]
   
-  # Rest of the function remains the same...
-  
-  # Ensure the carbon pool exists in the input data
   if (!carbon_pool %in% names(data)) {
     stop(paste("Carbon pool", carbon_pool, "not found in the input data."))
   }
@@ -46,12 +36,10 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   carbon_data <- as.data.frame(t(data[which(names(data) == carbon_pool)]))
   names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))
   
-  # Ensure site coordinates have 'lon' and 'lat' columns
   if (!all(c("lon", "lat") %in% names(site_coordinates))) {
     stop("Site coordinates must contain 'lon' and 'lat' columns.")
   }
   
-  # Ensure the number of rows in site coordinates matches the number of rows in carbon data
   if (nrow(site_coordinates) != nrow(carbon_data)) {
     message("Number of rows in site coordinates does not match the number of rows in carbon data.")
     if (nrow(site_coordinates) > nrow(carbon_data)) {

From d01f739583c9cc6ecf4ec377006dd10c15020b9e Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 05:00:45 +0530
Subject: [PATCH 057/155] Refactor SDA_downscale_preprocess for consistent date
 handling

- Convert input data names to Date objects using lubridate
- Use converted dates (input_date_names) for existence check and indexing
- Improve efficiency by avoiding redundant date conversions
---
 modules/assim.sequential/R/downscale_function.R | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8b929877e6d..7895a409e08 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -14,21 +14,27 @@
 
 # Preprocess function to check and clean the data
 SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
+  # Read the input data and site coordinates
   input_data <- readRDS(data_path)
   site_coordinates <- readr::read_csv(coords_path)
   
+  # Convert input_data names to Date objects
   input_date_names <- lubridate::ymd(names(input_data))
   names(input_data) <- input_date_names
   
+  # Convert the input date to a Date object
   standard_date <- lubridate::ymd(date)
   
-  if (!standard_date %in% names(input_data)) {
+  # Ensure the date exists in the input data
+  if (!standard_date %in% input_date_names) {
     stop(paste("Date", date, "not found in the input data."))
   }
   
-  index <- which(names(input_data) == standard_date)
+  # Extract the carbon data for the specified focus year
+  index <- which(input_date_names == standard_date)
   data <- input_data[[index]]
   
+  # Ensure the carbon pool exists in the input data
   if (!carbon_pool %in% names(data)) {
     stop(paste("Carbon pool", carbon_pool, "not found in the input data."))
   }
@@ -36,10 +42,12 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   carbon_data <- as.data.frame(t(data[which(names(data) == carbon_pool)]))
   names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))
   
+  # Ensure site coordinates have 'lon' and 'lat' columns
   if (!all(c("lon", "lat") %in% names(site_coordinates))) {
     stop("Site coordinates must contain 'lon' and 'lat' columns.")
   }
   
+  # Ensure the number of rows in site coordinates matches the number of rows in carbon data
   if (nrow(site_coordinates) != nrow(carbon_data)) {
     message("Number of rows in site coordinates does not match the number of rows in carbon data.")
     if (nrow(site_coordinates) > nrow(carbon_data)) {

From 62a8e4403b5b3bad3fa9a466179580b22829f6ac Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 13:59:13 +0530
Subject: [PATCH 058/155] Updated documentation to suit date type

- date earlier worked as a character
- New updates in the documentation to correctly reflect the type of date which is Date
---
 modules/assim.sequential/R/downscale_function.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 7895a409e08..5fc9fbb3036 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -4,7 +4,7 @@
 ##'
 ##' @param data_path Character. File path for .rds containing ensemble data.
 ##' @param coords_path Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".
-##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
+##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
 ##' @details This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
 ##'
@@ -68,7 +68,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @author Joshua Ploshay, Sambhav Dixit
 ##'
 ##' @param preprocessed List. Preprocessed data returned as an output from the SDA_downscale_preprocess function.
-##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
+##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
 ##' @param covariates SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.
 ##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network. Default is Random Forest.

From 7f782f24b6d2532b7b7ac0efb7cd6441647e6cb6 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:03:12 +0530
Subject: [PATCH 059/155] Update documentation for clarification of variable
 data

at several places in the documentation , it is unclear of what 'data'  refers to so I have added the necessary clarification to make it understood what data referred to
---
 modules/assim.sequential/R/downscale_function.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 5fc9fbb3036..c4d66fd4de1 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -68,8 +68,8 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @author Joshua Ploshay, Sambhav Dixit
 ##'
 ##' @param preprocessed List. Preprocessed data returned as an output from the SDA_downscale_preprocess function.
-##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
-##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
+##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'preprocessed' from the 'data_path'.
+##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'preprocessed' from the 'data_path'.
 ##' @param covariates SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.
 ##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network. Default is Random Forest.
 ##' @param seed Numeric or NULL. Optional seed for random number generation. Default is NULL.

From 21a615a40ac6d2ad88d4fe938b715f6b07566acc Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 19:41:26 +0530
Subject: [PATCH 060/155] added namespace to functions

the namespaces were missing form some functions do that has been resolved in this pr
---
 .../assim.sequential/R/downscale_function.R   | 144 +++++++++---------
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index c4d66fd4de1..56836a221c3 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -4,7 +4,7 @@
 ##'
 ##' @param data_path Character. File path for .rds containing ensemble data.
 ##' @param coords_path Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".
-##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
+##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
 ##' @details This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
 ##'
@@ -15,52 +15,52 @@
 # Preprocess function to check and clean the data
 SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
   # Read the input data and site coordinates
-  input_data <- readRDS(data_path)
+  input_data <- base::readRDS(data_path)
   site_coordinates <- readr::read_csv(coords_path)
   
   # Convert input_data names to Date objects
-  input_date_names <- lubridate::ymd(names(input_data))
-  names(input_data) <- input_date_names
+  input_date_names <- lubridate::ymd(base::names(input_data))
+  base::names(input_data) <- input_date_names
   
   # Convert the input date to a Date object
   standard_date <- lubridate::ymd(date)
   
   # Ensure the date exists in the input data
   if (!standard_date %in% input_date_names) {
-    stop(paste("Date", date, "not found in the input data."))
+    base::stop(base::paste("Date", date, "not found in the input data."))
   }
   
   # Extract the carbon data for the specified focus year
-  index <- which(input_date_names == standard_date)
+  index <- base::which(input_date_names == standard_date)
   data <- input_data[[index]]
   
   # Ensure the carbon pool exists in the input data
-  if (!carbon_pool %in% names(data)) {
-    stop(paste("Carbon pool", carbon_pool, "not found in the input data."))
+  if (!carbon_pool %in% base::names(data)) {
+    base::stop(base::paste("Carbon pool", carbon_pool, "not found in the input data."))
   }
   
-  carbon_data <- as.data.frame(t(data[which(names(data) == carbon_pool)]))
-  names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))
+  carbon_data <- base::as.data.frame(base::t(data[base::which(base::names(data) == carbon_pool)]))
+  base::names(carbon_data) <- base::paste0("ensemble", base::seq(base::ncol(carbon_data)))
   
   # Ensure site coordinates have 'lon' and 'lat' columns
-  if (!all(c("lon", "lat") %in% names(site_coordinates))) {
-    stop("Site coordinates must contain 'lon' and 'lat' columns.")
+  if (!base::all(c("lon", "lat") %in% base::names(site_coordinates))) {
+    base::stop("Site coordinates must contain 'lon' and 'lat' columns.")
   }
   
   # Ensure the number of rows in site coordinates matches the number of rows in carbon data
-  if (nrow(site_coordinates) != nrow(carbon_data)) {
-    message("Number of rows in site coordinates does not match the number of rows in carbon data.")
-    if (nrow(site_coordinates) > nrow(carbon_data)) {
-      message("Truncating site coordinates to match carbon data rows.")
-      site_coordinates <- site_coordinates[1:nrow(carbon_data), ]
+  if (base::nrow(site_coordinates) != base::nrow(carbon_data)) {
+    base::message("Number of rows in site coordinates does not match the number of rows in carbon data.")
+    if (base::nrow(site_coordinates) > base::nrow(carbon_data)) {
+      base::message("Truncating site coordinates to match carbon data rows.")
+      site_coordinates <- site_coordinates[1:base::nrow(carbon_data), ]
     } else {
-      message("Truncating carbon data to match site coordinates rows.")
-      carbon_data <- carbon_data[1:nrow(site_coordinates), ]
+      base::message("Truncating carbon data to match site coordinates rows.")
+      carbon_data <- carbon_data[1:base::nrow(site_coordinates), ]
     }
   }
   
-  message("Preprocessing completed successfully.")
-  return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
+  base::message("Preprocessing completed successfully.")
+  base::return(base::list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
 ##' @title SDA Downscale Function
@@ -88,34 +88,34 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
   
   # Extract predictors from covariates raster using site coordinates
-  predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
+  predictors <- base::as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
   
   # Dynamically get covariate names
-  covariate_names <- names(predictors)
+  covariate_names <- base::names(predictors)
   
   # Create a single data frame with all predictors and ensemble data
-  full_data <- cbind(carbon_data, predictors)
+  full_data <- base::cbind(carbon_data, predictors)
   
   # Split the observations into training and testing sets
-  if (!is.null(seed)) {
-    set.seed(seed)  # Only set seed if provided
+  if (!base::is.null(seed)) {
+    base::set.seed(seed)  # Only set seed if provided
   }
-  sample <- sample(1:nrow(full_data), size = round(0.75 * nrow(full_data)))
+  sample <- base::sample(1:base::nrow(full_data), size = base::round(0.75 * base::nrow(full_data)))
   train_data <- full_data[sample, ]
   test_data <- full_data[-sample, ]
   
   # Prepare data for both RF and CNN
-  x_data <- as.matrix(full_data[, covariate_names])
-  y_data <- as.matrix(carbon_data)
+  x_data <- base::as.matrix(full_data[, covariate_names])
+  y_data <- base::as.matrix(carbon_data)
   
   # Calculate scaling parameters from all data
-  scaling_params <- list(
-    mean = colMeans(x_data),
-    sd = apply(x_data, 2, sd)
+  scaling_params <- base::list(
+    mean = base::colMeans(x_data),
+    sd = base::apply(x_data, 2, stats::sd)
   )
   
   # Normalize the data
-  x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
+  x_data_scaled <- base::scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
   
   # Split into training and testing sets
   x_train <- x_data_scaled[sample, ]
@@ -125,10 +125,10 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   
   if (model_type == "rf") {
     # Train a random forest model for each ensemble member using the training data
-    rf_output <- list()
-    for (i in seq_along(carbon_data)) {
-      ensemble_col <- paste0("ensemble", i)
-      formula <- as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + ")))
+    rf_output <- base::list()
+    for (i in base::seq_along(carbon_data)) {
+      ensemble_col <- base::paste0("ensemble", i)
+      formula <- stats::as.formula(base::paste(ensemble_col, "~", base::paste(covariate_names, collapse = " + ")))
       rf_output[[i]] <- randomForest::randomForest(formula,
                                                    data = train_data,
                                                    ntree = 1000,
@@ -138,16 +138,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     }
     
     # Generate predictions (maps) for each ensemble member using the trained models
-    maps <- list()
-    predictions <- list()
-    for (i in seq_along(rf_output)) {
+    maps <- base::list()
+    predictions <- base::list()
+    for (i in base::seq_along(rf_output)) {
       maps[[i]] <- terra::predict(covariates, model = rf_output[[i]], na.rm = TRUE)
-      predictions[[i]] <- predict(rf_output[[i]], test_data)
+      predictions[[i]] <- stats::predict(rf_output[[i]], test_data)
     }
     
     # Organize the results into a single output list
-    downscale_output <- list(
-      data = list(training = train_data, testing = test_data),
+    downscale_output <- base::list(
+      data = base::list(training = train_data, testing = test_data),
       models = rf_output,
       maps = maps,
       predictions = predictions
@@ -155,15 +155,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     
   } else if (model_type == "cnn") {
     # Reshape data for CNN input (samples, timesteps, features)
-    x_train <- array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test <- array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+    x_train <- array_reshape(x_train, c(base::nrow(x_train), 1, base::ncol(x_train)))
+    x_test <- array_reshape(x_test, c(base::nrow(x_test), 1, base::ncol(x_test)))
     
     # Train a CNN model for each ensemble member
-    cnn_output <- list()
-    for (i in seq_along(carbon_data)) {
+    cnn_output <- base::list()
+    for (i in base::seq_along(carbon_data)) {
       # Define the CNN model
       model <- keras_model_sequential() |>
-        layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
+        layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, base::length(covariate_names))) |>
         layer_flatten() |>
         layer_dense(units = 64, activation = 'relu') |>
         layer_dense(units = 1)
@@ -190,16 +190,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     
     # Custom predict function for CNN
     cnn_predict <- function(model, newdata, scaling_params) {
-      newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
-      newdata <- array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
-      predictions <- predict(model, newdata)
-      return(as.vector(predictions))
+      newdata <- base::scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
+      newdata <- array_reshape(newdata, c(base::nrow(newdata), 1, base::ncol(newdata)))
+      predictions <- stats::predict(model, newdata)
+      base::return(base::as.vector(predictions))
     }
     
     # Generate predictions (maps) for each ensemble member using the trained models
-    maps <- list()
-    predictions <- list()
-    for (i in seq_along(cnn_output)) {
+    maps <- base::list()
+    predictions <- base::list()
+    for (i in base::seq_along(cnn_output)) {
       # Create a SpatRaster with the same properties as covariates
       prediction_rast <- terra::rast(covariates)
       
@@ -213,25 +213,25 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     }
     
     # Organize the results into a single output list
-    downscale_output <- list(
-      data = list(training = train_data, testing = test_data),
+    downscale_output <- base::list(
+      data = base::list(training = train_data, testing = test_data),
       models = cnn_output,
       maps = maps,
       predictions = predictions,
       scaling_params = scaling_params
     )
   } else {
-    stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
+    base::stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
   
   # Rename each element of the output list with appropriate ensemble numbers
-  for (i in seq_along(carbon_data)) {
-    names(downscale_output$models)[i] <- paste0("ensemble", i)
-    names(downscale_output$maps)[i] <- paste0("ensemble", i)
-    names(downscale_output$predictions)[i] <- paste0("ensemble", i)
+  for (i in base::seq_along(carbon_data)) {
+    base::names(downscale_output$models)[i] <- base::paste0("ensemble", i)
+    base::names(downscale_output$maps)[i] <- base::paste0("ensemble", i)
+    base::names(downscale_output$predictions)[i] <- base::paste0("ensemble", i)
   }
   
-  return(downscale_output)
+  base::return(downscale_output)
 }
 
 ##' @title Calculate Metrics for Downscaling Results
@@ -248,20 +248,20 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
 ##' @return A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data 
 
 SDA_downscale_metrics <- function(downscale_output, carbon_pool) {
-  metrics <- list()
+  metrics <- base::list()
   
-  for (i in 1:length(downscale_output$data)) {
-    actual <- downscale_output$data[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
+  for (i in 1:base::length(downscale_output$data)) {
+    actual <- downscale_output$data[[i]]$testing[[base::paste0(carbon_pool, "_ens", i)]]
     predicted <- downscale_output$predictions[[i]]
     
-    mse <- mean((actual - predicted)^2)
-    mae <- mean(abs(actual - predicted))
-    r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
+    mse <- base::mean((actual - predicted)^2)
+    mae <- base::mean(base::abs(actual - predicted))
+    r_squared <- 1 - base::sum((actual - predicted)^2) / base::sum((actual - base::mean(actual))^2)
     
-    metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
+    metrics[[i]] <- base::list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
   }
   
-  names(metrics) <- paste0("ensemble", seq_along(metrics))
+  base::names(metrics) <- base::paste0("ensemble", base::seq_along(metrics))
   
-  return(metrics)
+  base::return(metrics)
 }

From c8c234a9bff5d484f0c6e62a110765520ce21807 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 19:59:32 +0530
Subject: [PATCH 061/155] Unify output structure for RF and CNN models in
 SDA_downscale function

- Replaced separate rf_output and cnn_output lists with a single 'models' list
- Standardized the structure of downscale_output for both RF and CNN options
- Included scaling_params in the output for both model types
- Refactored main function body to use consistent lists (models, maps, predictions) for both RF and CNN
- Moved cnn_predict function inside the CNN loop for better encapsulation
- Ensured consistent naming and structure of output elements across model types
---
 .../assim.sequential/R/downscale_function.R   | 95 +++++++------------
 1 file changed, 34 insertions(+), 61 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 56836a221c3..8e5bdf50088 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -123,59 +123,42 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   y_train <- y_data[sample, ]
   y_test <- y_data[-sample, ]
   
+  # Initialize lists for outputs
+  models <- base::list()
+  maps <- base::list()
+  predictions <- base::list()
+  
   if (model_type == "rf") {
-    # Train a random forest model for each ensemble member using the training data
-    rf_output <- base::list()
     for (i in base::seq_along(carbon_data)) {
       ensemble_col <- base::paste0("ensemble", i)
       formula <- stats::as.formula(base::paste(ensemble_col, "~", base::paste(covariate_names, collapse = " + ")))
-      rf_output[[i]] <- randomForest::randomForest(formula,
-                                                   data = train_data,
-                                                   ntree = 1000,
-                                                   na.action = stats::na.omit,
-                                                   keep.forest = TRUE,
-                                                   importance = TRUE)
-    }
-    
-    # Generate predictions (maps) for each ensemble member using the trained models
-    maps <- base::list()
-    predictions <- base::list()
-    for (i in base::seq_along(rf_output)) {
-      maps[[i]] <- terra::predict(covariates, model = rf_output[[i]], na.rm = TRUE)
-      predictions[[i]] <- stats::predict(rf_output[[i]], test_data)
+      models[[i]] <- randomForest::randomForest(formula,
+                                                data = train_data,
+                                                ntree = 1000,
+                                                na.action = stats::na.omit,
+                                                keep.forest = TRUE,
+                                                importance = TRUE)
+      
+      maps[[i]] <- terra::predict(covariates, model = models[[i]], na.rm = TRUE)
+      predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
-    
-    # Organize the results into a single output list
-    downscale_output <- base::list(
-      data = base::list(training = train_data, testing = test_data),
-      models = rf_output,
-      maps = maps,
-      predictions = predictions
-    )
-    
   } else if (model_type == "cnn") {
-    # Reshape data for CNN input (samples, timesteps, features)
     x_train <- array_reshape(x_train, c(base::nrow(x_train), 1, base::ncol(x_train)))
     x_test <- array_reshape(x_test, c(base::nrow(x_test), 1, base::ncol(x_test)))
     
-    # Train a CNN model for each ensemble member
-    cnn_output <- base::list()
     for (i in base::seq_along(carbon_data)) {
-      # Define the CNN model
       model <- keras_model_sequential() |>
         layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, base::length(covariate_names))) |>
         layer_flatten() |>
         layer_dense(units = 64, activation = 'relu') |>
         layer_dense(units = 1)
       
-      # Compile the model
       model |> compile(
         loss = 'mean_squared_error',
         optimizer = optimizer_adam(),
         metrics = c('mean_absolute_error')
       )
       
-      # Train the model
       model |> fit(
         x = x_train,
         y = y_train[, i],
@@ -185,45 +168,35 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         verbose = 0
       )
       
-      cnn_output[[i]] <- model
-    }
-    
-    # Custom predict function for CNN
-    cnn_predict <- function(model, newdata, scaling_params) {
-      newdata <- base::scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
-      newdata <- array_reshape(newdata, c(base::nrow(newdata), 1, base::ncol(newdata)))
-      predictions <- stats::predict(model, newdata)
-      base::return(base::as.vector(predictions))
-    }
-    
-    # Generate predictions (maps) for each ensemble member using the trained models
-    maps <- base::list()
-    predictions <- base::list()
-    for (i in base::seq_along(cnn_output)) {
-      # Create a SpatRaster with the same properties as covariates
-      prediction_rast <- terra::rast(covariates)
+      models[[i]] <- model
+      
+      cnn_predict <- function(model, newdata, scaling_params) {
+        newdata <- base::scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
+        newdata <- array_reshape(newdata, c(base::nrow(newdata), 1, base::ncol(newdata)))
+        predictions <- stats::predict(model, newdata)
+        base::return(base::as.vector(predictions))
+      }
       
-      # Use terra::predict to apply the CNN model
-      maps[[i]] <- terra::predict(prediction_rast, model = cnn_output[[i]],
+      prediction_rast <- terra::rast(covariates)
+      maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
                                   fun = cnn_predict,
                                   scaling_params = scaling_params)
       
-      # Generate predictions for testing data
-      predictions[[i]] <- cnn_predict(cnn_output[[i]], x_data[-sample, ], scaling_params)
+      predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
     }
-    
-    # Organize the results into a single output list
-    downscale_output <- base::list(
-      data = base::list(training = train_data, testing = test_data),
-      models = cnn_output,
-      maps = maps,
-      predictions = predictions,
-      scaling_params = scaling_params
-    )
   } else {
     base::stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
   
+  # Organize the results into a single output list
+  downscale_output <- base::list(
+    data = base::list(training = train_data, testing = test_data),
+    models = models,
+    maps = maps,
+    predictions = predictions,
+    scaling_params = scaling_params
+  )
+  
   # Rename each element of the output list with appropriate ensemble numbers
   for (i in base::seq_along(carbon_data)) {
     base::names(downscale_output$models)[i] <- base::paste0("ensemble", i)

From 19402db78ed72af08315f1ef71aa5091dc569574 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:05:52 +0530
Subject: [PATCH 062/155] removed extra description for preprocess function

- Removed redundant preprocessing description from SDA_downscale function
- This description was already clear from the function's parameters and usage
- Refactoring helps prevent potential issues with roxygen documentation
- Improves overall code clarity and maintainability
---
 modules/assim.sequential/R/downscale_function.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8e5bdf50088..e82d192da70 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -12,7 +12,6 @@
 ##'
 ##' @return A list containing The read .rds data , The cleaned site coordinates, and the preprocessed carbon data.
 
-# Preprocess function to check and clean the data
 SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
   # Read the input data and site coordinates
   input_data <- base::readRDS(data_path)

From f43a50a4b410396b1b3e38dac8d1a37753dbd8d7 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:15:46 +0530
Subject: [PATCH 063/155] Changed the documentation for predictors for
 downscale instead of CNN

the documentation's earlier version for spatRaster specifically catered to the CNN , but it should be for downscale . refactored for that change .
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index e82d192da70..c24a0f404a1 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -69,7 +69,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @param preprocessed List. Preprocessed data returned as an output from the SDA_downscale_preprocess function.
 ##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'preprocessed' from the 'data_path'.
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'preprocessed' from the 'data_path'.
-##' @param covariates SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.
+##' @param covariates SpatRaster stack. Used as predictors in downscaling. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 ##' @param model_type Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network. Default is Random Forest.
 ##' @param seed Numeric or NULL. Optional seed for random number generation. Default is NULL.
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations

From 62221d94a590d555e27af1eb6fe135a633ed4cc5 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:23:56 +0530
Subject: [PATCH 064/155] Update
 modules/assim.sequential/R/downscale_function.R

Co-authored-by: Michael Dietze <dietze@bu.edu>
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index c24a0f404a1..d48e9227dce 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -84,7 +84,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   carbon_data <- preprocessed$carbon_data
   
   # Convert site coordinates to SpatVector
-  site_coordinates <- terra::vect(site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
+  site_coordinates <- terra::vect(preprocessed$site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
   
   # Extract predictors from covariates raster using site coordinates
   predictors <- base::as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))

From 0af7df7f0b46e2f37beab6ac10b64b7fcf88aa15 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:32:13 +0530
Subject: [PATCH 065/155] Update
 modules/assim.sequential/R/downscale_function.R

Co-authored-by: Michael Dietze <dietze@bu.edu>
---
 modules/assim.sequential/R/downscale_function.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index d48e9227dce..b488d9cd66d 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -80,7 +80,6 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 
 SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf", seed = NULL) {
   input_data <- preprocessed$input_data
-  site_coordinates <- preprocessed$site_coordinates
   carbon_data <- preprocessed$carbon_data
   
   # Convert site coordinates to SpatVector

From 1e6a4845e1e10edeec54c59a5f91033684d293d1 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:32:57 +0530
Subject: [PATCH 066/155] Update
 modules/assim.sequential/R/downscale_function.R

Co-authored-by: Michael Dietze <dietze@bu.edu>
---
 modules/assim.sequential/R/downscale_function.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index b488d9cd66d..9cf58789184 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -79,7 +79,6 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @return A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
 
 SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf", seed = NULL) {
-  input_data <- preprocessed$input_data
   carbon_data <- preprocessed$carbon_data
   
   # Convert site coordinates to SpatVector

From 529fe6f783cc1a2b77cff24676268cbeb20d8c56 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:33:54 +0530
Subject: [PATCH 067/155] update carbon_data call

Co-authored-by: Michael Dietze <dietze@bu.edu>
---
 modules/assim.sequential/R/downscale_function.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 9cf58789184..86d918273dd 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -79,7 +79,6 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @return A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
 
 SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf", seed = NULL) {
-  carbon_data <- preprocessed$carbon_data
   
   # Convert site coordinates to SpatVector
   site_coordinates <- terra::vect(preprocessed$site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")

From 2227fd9380d4b9808b355bf56f03b3a7b8a1f71a Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:36:41 +0530
Subject: [PATCH 068/155] updated full_data preprocess call

instead of call from individual variables , updated full_data call to be called from the preprocessed object
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 86d918273dd..44da0d8ace0 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -90,7 +90,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   covariate_names <- base::names(predictors)
   
   # Create a single data frame with all predictors and ensemble data
-  full_data <- base::cbind(carbon_data, predictors)
+  full_data <- base::cbind(preprocessed$carbon_data, predictors)
   
   # Split the observations into training and testing sets
   if (!base::is.null(seed)) {

From 80e5b2d7b69048f13a72b20eb92c443b5e6c668a Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:53:12 +0530
Subject: [PATCH 069/155] Revert "update carbon_data call"

This reverts commit 529fe6f , 2227fd9.

Reason for reversion:
The updated version, which attempted to call full_data directly from the
preprocessed object, resulted in unexpected errors. Specifically:

1. Direct access to preprocessed$carbon_data within cbind() caused issues,
   possibly due to complex object structure or scoping problems.
2. The original version, which first assigns carbon_data to a local variable,
   works correctly by creating a local copy that's easily accessible.
3. This reversion ensures the function's stability and correct data handling,
   maintaining the integrity of our downscaling process.

While the intent was to streamline the code, the original implementation
proves more robust in handling the preprocessed object's structure.
---
 modules/assim.sequential/R/downscale_function.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 44da0d8ace0..9cf58789184 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -79,6 +79,7 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
 ##' @return A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
 
 SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_type = "rf", seed = NULL) {
+  carbon_data <- preprocessed$carbon_data
   
   # Convert site coordinates to SpatVector
   site_coordinates <- terra::vect(preprocessed$site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
@@ -90,7 +91,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   covariate_names <- base::names(predictors)
   
   # Create a single data frame with all predictors and ensemble data
-  full_data <- base::cbind(preprocessed$carbon_data, predictors)
+  full_data <- base::cbind(carbon_data, predictors)
   
   # Split the observations into training and testing sets
   if (!base::is.null(seed)) {

From 9f6554b372703f74c54c82579347e5f33f581aa5 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:11:54 +0530
Subject: [PATCH 070/155] Update SDA_downscale.Rd documentation

Updated the SDA_downscale.Rd to be updated for the current version of the code and Roxygen
---
 modules/assim.sequential/man/SDA_downscale.Rd | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/modules/assim.sequential/man/SDA_downscale.Rd b/modules/assim.sequential/man/SDA_downscale.Rd
index 79e05cf9386..89f6810866b 100644
--- a/modules/assim.sequential/man/SDA_downscale.Rd
+++ b/modules/assim.sequential/man/SDA_downscale.Rd
@@ -2,38 +2,39 @@
 % Please edit documentation in R/downscale_function.R
 \name{SDA_downscale}
 \alias{SDA_downscale}
-\title{North America Downscale Function}
+\title{SDA Downscale Function}
 \usage{
-SDA_downscale(preprocessed, date, carbon_pool, covariates, model_type)
+SDA_downscale(
+  preprocessed,
+  date,
+  carbon_pool,
+  covariates,
+  model_type = "rf",
+  seed = NULL
+)
 }
 \arguments{
 \item{preprocessed}{List. Preprocessed data returned as an output from the SDA_downscale_preprocess function.}
 
-\item{date}{Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.}
+\item{date}{Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'preprocessed' from the 'data_path'.}
 
-\item{carbon_pool}{Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.}
+\item{carbon_pool}{Character. Carbon pool of interest. Name must match carbon pool name found within file supplied to 'preprocessed' from the 'data_path'.}
 
-\item{covariates}{SpatRaster stack. Used as predictors in CNN. Layers within stack should be named.}
+\item{covariates}{SpatRaster stack. Used as predictors in downscaling. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
 
-\item{model_type}{Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network.}
+\item{model_type}{Character. Either "rf" for Random Forest or "cnn" for Convolutional Neural Network. Default is Random Forest.}
+
+\item{seed}{Numeric or NULL. Optional seed for random number generation. Default is NULL.}
 }
 \value{
-It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-
 A list containing the training and testing data sets, models, predicted maps for each ensemble member, and predictions for testing data.
 }
 \description{
-This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
-
 This function uses either Random Forest or Convolutional Neural Network model based on the model_type parameter.
 }
 \details{
-This function will downscale forecast data to unmodeled locations using covariates and site locations
-
 This function will downscale forecast data to unmodeled locations using covariates and site locations
 }
 \author{
-Joshua Ploshay , Sambhav Dixit
-
 Joshua Ploshay, Sambhav Dixit
 }

From 6001fad29e8b7beabc101e8e53503d766a28983e Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:18:30 +0530
Subject: [PATCH 071/155] Change date type to Date in preprocess function

left this one out , noticed while giving it a final screen before commit , updating it as well
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 9cf58789184..9ed6be4ef58 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -4,7 +4,7 @@
 ##'
 ##' @param data_path Character. File path for .rds containing ensemble data.
 ##' @param coords_path Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".
-##' @param date Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
+##' @param date Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.
 ##' @param carbon_pool Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.
 ##' @details This function ensures that the specified date and carbon pool are present in the input data. It also checks the validity of the site coordinates and aligns the number of rows between site coordinates and carbon data.
 ##'

From 909ae686c1bcdeaf58ef2089476384613b49ec7c Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:23:36 +0530
Subject: [PATCH 072/155] Update SDA_downscale.Rd

updated it after a fix to the code

From 9c084656565be8266e0a5df8a980ffb21d818ec0 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:24:50 +0530
Subject: [PATCH 073/155] Update SDA_downscale_preprocess.Rd

updated after the change to the date class turned to Date from character and other piled up changes .
---
 modules/assim.sequential/man/SDA_downscale_preprocess.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/man/SDA_downscale_preprocess.Rd b/modules/assim.sequential/man/SDA_downscale_preprocess.Rd
index 0bed11f4ba9..0e2a9f70bfe 100644
--- a/modules/assim.sequential/man/SDA_downscale_preprocess.Rd
+++ b/modules/assim.sequential/man/SDA_downscale_preprocess.Rd
@@ -11,7 +11,7 @@ SDA_downscale_preprocess(data_path, coords_path, date, carbon_pool)
 
 \item{coords_path}{Character. File path for .csv file containing the site coordinates, with columns named "lon" and "lat".}
 
-\item{date}{Character. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.}
+\item{date}{Date. If SDA site run, format is yyyy/mm/dd; if NEON, yyyy-mm-dd. Restricted to years within the file supplied to 'data_path'.}
 
 \item{carbon_pool}{Character. Carbon pool of interest. Name must match the carbon pool name found within the file supplied to 'data_path'.}
 }

From 388939742f559760d6fbfc1e8b51596858462fba Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:26:50 +0530
Subject: [PATCH 074/155] Delete calculate_metrics.Rd

deleted the Calculate_metrics.Rd since it got redundant following its replacement SDA_downscale_metrics.Rd
---
 .../assim.sequential/man/calculate_metrics.Rd | 25 -------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 modules/assim.sequential/man/calculate_metrics.Rd

diff --git a/modules/assim.sequential/man/calculate_metrics.Rd b/modules/assim.sequential/man/calculate_metrics.Rd
deleted file mode 100644
index f603c84707f..00000000000
--- a/modules/assim.sequential/man/calculate_metrics.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/downscale_function.R
-\name{calculate_metrics}
-\alias{calculate_metrics}
-\title{Calculate Metrics for Downscaling Results}
-\usage{
-calculate_metrics(downscale_output, carbon_pool)
-}
-\arguments{
-\item{downscale_output}{List. Output from the SDA_downscale function, containing data, models, maps, and predictions for each ensemble.}
-
-\item{carbon_pool}{Character. Name of the carbon pool used in the downscaling process.}
-}
-\value{
-A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data
-}
-\description{
-This function takes the output from the SDA_downscale function and computes various performance metrics for each ensemble. It provides a way to evaluate the accuracy of the downscaling results without modifying the main downscaling function.
-}
-\details{
-This function calculates performance metrics for the downscaling results. It computes Mean Squared Error (MSE), Mean Absolute Error (MAE), and R-squared for each ensemble. The function uses the actual values from the testing data and the predictions generated during the downscaling process.
-}
-\author{
-Sambhav Dixit
-}

From 71c1013e6816457afa3f95a4180f73f8c869b4f9 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:33:58 +0530
Subject: [PATCH 075/155] Create SDA_downscale_metrics.Rd

Added the SDA_downscale_metrics.Rd as the replacement for the calculate_metrics.Rd , also deleted that from the branch in previous commit.
---
 .../man/SDA_downscale_metrics.Rd              | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 modules/assim.sequential/man/SDA_downscale_metrics.Rd

diff --git a/modules/assim.sequential/man/SDA_downscale_metrics.Rd b/modules/assim.sequential/man/SDA_downscale_metrics.Rd
new file mode 100644
index 00000000000..6ee30bb0c8a
--- /dev/null
+++ b/modules/assim.sequential/man/SDA_downscale_metrics.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function.R
+\name{SDA_downscale_metrics}
+\alias{SDA_downscale_metrics}
+\title{Calculate Metrics for Downscaling Results}
+\usage{
+SDA_downscale_metrics(downscale_output, carbon_pool)
+}
+\arguments{
+\item{downscale_output}{List. Output from the SDA_downscale function, containing data, models, maps, and predictions for each ensemble.}
+
+\item{carbon_pool}{Character. Name of the carbon pool used in the downscaling process.}
+}
+\value{
+A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data
+}
+\description{
+This function takes the output from the SDA_downscale function and computes various performance metrics for each ensemble. It provides a way to evaluate the accuracy of the downscaling results without modifying the main downscaling function.
+}
+\details{
+This function calculates performance metrics for the downscaling results. It computes Mean Squared Error (MSE), Mean Absolute Error (MAE), and R-squared for each ensemble. The function uses the actual values from the testing data and the predictions generated during the downscaling process.
+}
+\author{
+Sambhav Dixit
+}

From e143b38942f21ef13b7fc1ba2310cfd3f3b000dc Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Wed, 24 Jul 2024 14:31:46 -0400
Subject: [PATCH 076/155] Name added to CITATION

---
 CITATION.cff | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index e00ef7b29f6..7af92146298 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -120,7 +120,9 @@ authors:
   - given-names: Eric R. Scott
     affiliation: University of Arizona
     orcid: 'https://orcid.org/0000-0002-7430-7879'
-  
+  - given-names: Harunobu Ishii
+    affiliation: Boston University Software & Application Innovation Lab(SAIL)
+
 preferred-citation:
   type: article
   title: Facilitating feedbacks between field measurements and ecosystem models

From 38c9e7a31e6417bdbce5cdf421647a239982a86f Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Fri, 26 Jul 2024 22:54:14 +0530
Subject: [PATCH 077/155] modified namespaces

added keras3:: and removed base:: namespaces
---
 .../assim.sequential/R/downscale_function.R   | 138 +++++++++---------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 9ed6be4ef58..30280dab8d9 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -14,52 +14,52 @@
 
 SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
   # Read the input data and site coordinates
-  input_data <- base::readRDS(data_path)
+  input_data <- readRDS(data_path)
   site_coordinates <- readr::read_csv(coords_path)
   
   # Convert input_data names to Date objects
-  input_date_names <- lubridate::ymd(base::names(input_data))
-  base::names(input_data) <- input_date_names
+  input_date_names <- lubridate::ymd(names(input_data))
+  names(input_data) <- input_date_names
   
   # Convert the input date to a Date object
   standard_date <- lubridate::ymd(date)
   
   # Ensure the date exists in the input data
   if (!standard_date %in% input_date_names) {
-    base::stop(base::paste("Date", date, "not found in the input data."))
+    stop(paste("Date", date, "not found in the input data."))
   }
   
   # Extract the carbon data for the specified focus year
-  index <- base::which(input_date_names == standard_date)
+  index <- which(input_date_names == standard_date)
   data <- input_data[[index]]
   
   # Ensure the carbon pool exists in the input data
-  if (!carbon_pool %in% base::names(data)) {
-    base::stop(base::paste("Carbon pool", carbon_pool, "not found in the input data."))
+  if (!carbon_pool %in% names(data)) {
+    stop(paste("Carbon pool", carbon_pool, "not found in the input data."))
   }
   
-  carbon_data <- base::as.data.frame(base::t(data[base::which(base::names(data) == carbon_pool)]))
-  base::names(carbon_data) <- base::paste0("ensemble", base::seq(base::ncol(carbon_data)))
+  carbon_data <- as.data.frame(t(data[which(names(data) == carbon_pool)]))
+  names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))
   
   # Ensure site coordinates have 'lon' and 'lat' columns
-  if (!base::all(c("lon", "lat") %in% base::names(site_coordinates))) {
-    base::stop("Site coordinates must contain 'lon' and 'lat' columns.")
+  if (!all(c("lon", "lat") %in% names(site_coordinates))) {
+    stop("Site coordinates must contain 'lon' and 'lat' columns.")
   }
   
   # Ensure the number of rows in site coordinates matches the number of rows in carbon data
-  if (base::nrow(site_coordinates) != base::nrow(carbon_data)) {
-    base::message("Number of rows in site coordinates does not match the number of rows in carbon data.")
-    if (base::nrow(site_coordinates) > base::nrow(carbon_data)) {
-      base::message("Truncating site coordinates to match carbon data rows.")
-      site_coordinates <- site_coordinates[1:base::nrow(carbon_data), ]
+  if (nrow(site_coordinates) != nrow(carbon_data)) {
+    message("Number of rows in site coordinates does not match the number of rows in carbon data.")
+    if (nrow(site_coordinates) > nrow(carbon_data)) {
+      message("Truncating site coordinates to match carbon data rows.")
+      site_coordinates <- site_coordinates[1:nrow(carbon_data), ]
     } else {
-      base::message("Truncating carbon data to match site coordinates rows.")
-      carbon_data <- carbon_data[1:base::nrow(site_coordinates), ]
+      message("Truncating carbon data to match site coordinates rows.")
+      carbon_data <- carbon_data[1:nrow(site_coordinates), ]
     }
   }
   
-  base::message("Preprocessing completed successfully.")
-  base::return(base::list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
+  message("Preprocessing completed successfully.")
+  return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
 ##' @title SDA Downscale Function
@@ -85,34 +85,34 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   site_coordinates <- terra::vect(preprocessed$site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")
   
   # Extract predictors from covariates raster using site coordinates
-  predictors <- base::as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
+  predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
   
   # Dynamically get covariate names
-  covariate_names <- base::names(predictors)
+  covariate_names <- names(predictors)
   
   # Create a single data frame with all predictors and ensemble data
-  full_data <- base::cbind(carbon_data, predictors)
+  full_data <- cbind(carbon_data, predictors)
   
   # Split the observations into training and testing sets
-  if (!base::is.null(seed)) {
-    base::set.seed(seed)  # Only set seed if provided
+  if (!is.null(seed)) {
+    set.seed(seed)  # Only set seed if provided
   }
-  sample <- base::sample(1:base::nrow(full_data), size = base::round(0.75 * base::nrow(full_data)))
+  sample <- sample(1:nrow(full_data), size = round(0.75 * nrow(full_data)))
   train_data <- full_data[sample, ]
   test_data <- full_data[-sample, ]
   
   # Prepare data for both RF and CNN
-  x_data <- base::as.matrix(full_data[, covariate_names])
-  y_data <- base::as.matrix(carbon_data)
+  x_data <- as.matrix(full_data[, covariate_names])
+  y_data <- as.matrix(carbon_data)
   
   # Calculate scaling parameters from all data
-  scaling_params <- base::list(
-    mean = base::colMeans(x_data),
-    sd = base::apply(x_data, 2, stats::sd)
+  scaling_params <- list(
+    mean = colMeans(x_data),
+    sd = apply(x_data, 2, stats::sd)
   )
   
   # Normalize the data
-  x_data_scaled <- base::scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
+  x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
   
   # Split into training and testing sets
   x_train <- x_data_scaled[sample, ]
@@ -121,14 +121,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   y_test <- y_data[-sample, ]
   
   # Initialize lists for outputs
-  models <- base::list()
-  maps <- base::list()
-  predictions <- base::list()
+  models <- list()
+  maps <- list()
+  predictions <- list()
   
   if (model_type == "rf") {
-    for (i in base::seq_along(carbon_data)) {
-      ensemble_col <- base::paste0("ensemble", i)
-      formula <- stats::as.formula(base::paste(ensemble_col, "~", base::paste(covariate_names, collapse = " + ")))
+    for (i in seq_along(carbon_data)) {
+      ensemble_col <- paste0("ensemble", i)
+      formula <- stats::as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + ")))
       models[[i]] <- randomForest::randomForest(formula,
                                                 data = train_data,
                                                 ntree = 1000,
@@ -140,23 +140,23 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
-    x_train <- array_reshape(x_train, c(base::nrow(x_train), 1, base::ncol(x_train)))
-    x_test <- array_reshape(x_test, c(base::nrow(x_test), 1, base::ncol(x_test)))
+    x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
     
-    for (i in base::seq_along(carbon_data)) {
-      model <- keras_model_sequential() |>
-        layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, base::length(covariate_names))) |>
-        layer_flatten() |>
-        layer_dense(units = 64, activation = 'relu') |>
-        layer_dense(units = 1)
+    for (i in seq_along(carbon_data)) {
+      model <- keras3::keras_model_sequential() |>
+        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
+        keras3::layer_flatten() |>
+        keras3::layer_dense(units = 64, activation = 'relu') |>
+        keras3::layer_dense(units = 1)
       
-      model |> compile(
+      model |> keras3::compile(
         loss = 'mean_squared_error',
-        optimizer = optimizer_adam(),
+        optimizer = keras3::optimizer_adam(),
         metrics = c('mean_absolute_error')
       )
       
-      model |> fit(
+      model |> keras3::fit(
         x = x_train,
         y = y_train[, i],
         epochs = 100,
@@ -168,10 +168,10 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       models[[i]] <- model
       
       cnn_predict <- function(model, newdata, scaling_params) {
-        newdata <- base::scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
-        newdata <- array_reshape(newdata, c(base::nrow(newdata), 1, base::ncol(newdata)))
+        newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
+        newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
         predictions <- stats::predict(model, newdata)
-        base::return(base::as.vector(predictions))
+        return(as.vector(predictions))
       }
       
       prediction_rast <- terra::rast(covariates)
@@ -182,12 +182,12 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
     }
   } else {
-    base::stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
+    stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
   }
   
   # Organize the results into a single output list
-  downscale_output <- base::list(
-    data = base::list(training = train_data, testing = test_data),
+  downscale_output <- list(
+    data = list(training = train_data, testing = test_data),
     models = models,
     maps = maps,
     predictions = predictions,
@@ -195,13 +195,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   )
   
   # Rename each element of the output list with appropriate ensemble numbers
-  for (i in base::seq_along(carbon_data)) {
-    base::names(downscale_output$models)[i] <- base::paste0("ensemble", i)
-    base::names(downscale_output$maps)[i] <- base::paste0("ensemble", i)
-    base::names(downscale_output$predictions)[i] <- base::paste0("ensemble", i)
+  for (i in seq_along(carbon_data)) {
+    names(downscale_output$models)[i] <- paste0("ensemble", i)
+    names(downscale_output$maps)[i] <- paste0("ensemble", i)
+    names(downscale_output$predictions)[i] <- paste0("ensemble", i)
   }
   
-  base::return(downscale_output)
+  return(downscale_output)
 }
 
 ##' @title Calculate Metrics for Downscaling Results
@@ -218,20 +218,20 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
 ##' @return A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data 
 
 SDA_downscale_metrics <- function(downscale_output, carbon_pool) {
-  metrics <- base::list()
+  metrics <- list()
   
-  for (i in 1:base::length(downscale_output$data)) {
-    actual <- downscale_output$data[[i]]$testing[[base::paste0(carbon_pool, "_ens", i)]]
+  for (i in 1:length(downscale_output$data)) {
+    actual <- downscale_output$data[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
     predicted <- downscale_output$predictions[[i]]
     
-    mse <- base::mean((actual - predicted)^2)
-    mae <- base::mean(base::abs(actual - predicted))
-    r_squared <- 1 - base::sum((actual - predicted)^2) / base::sum((actual - base::mean(actual))^2)
+    mse <- mean((actual - predicted)^2)
+    mae <- mean(abs(actual - predicted))
+    r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
     
-    metrics[[i]] <- base::list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
+    metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
   }
   
-  base::names(metrics) <- base::paste0("ensemble", base::seq_along(metrics))
+  names(metrics) <- paste0("ensemble", seq_along(metrics))
   
-  base::return(metrics)
+  return(metrics)
 }

From 717d5d4ed744ce5c6ae6186d37c321eeb03ddc8f Mon Sep 17 00:00:00 2001
From: Meet Agrawal <meet.m.agrawal@oracle.com>
Date: Sat, 27 Jul 2024 18:35:27 +0530
Subject: [PATCH 078/155] fixed typo

---
 modules/meta.analysis/R/rename_jags_columns.R    | 2 +-
 modules/meta.analysis/man/rename_jags_columns.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/meta.analysis/R/rename_jags_columns.R b/modules/meta.analysis/R/rename_jags_columns.R
index d18fc5bb453..43d22ef7b0e 100644
--- a/modules/meta.analysis/R/rename_jags_columns.R
+++ b/modules/meta.analysis/R/rename_jags_columns.R
@@ -8,7 +8,7 @@
 #-------------------------------------------------------------------------------
 
 ##-----------------------------------------------------------------------------#
-##' renames the variables within output data frame trait.data
+##' Renames the variables within output data frame trait.data
 ##'
 ##' @param data data frame to with variables to rename
 ##'
diff --git a/modules/meta.analysis/man/rename_jags_columns.Rd b/modules/meta.analysis/man/rename_jags_columns.Rd
index 5a53963c293..d9da830d917 100644
--- a/modules/meta.analysis/man/rename_jags_columns.Rd
+++ b/modules/meta.analysis/man/rename_jags_columns.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/rename_jags_columns.R
 \name{rename_jags_columns}
 \alias{rename_jags_columns}
-\title{renames the variables within output data frame trait.data}
+\title{Renames the variables within output data frame trait.data}
 \usage{
 rename_jags_columns(data)
 }

From 51f0283b88f35cdce98bfe6b37b0d6bb7b28a118 Mon Sep 17 00:00:00 2001
From: Meet Agrawal <meet.m.agrawal@oracle.com>
Date: Sat, 27 Jul 2024 21:33:47 +0530
Subject: [PATCH 079/155] roxygenise

---
 modules/meta.analysis/man/rename_jags_columns.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/meta.analysis/man/rename_jags_columns.Rd b/modules/meta.analysis/man/rename_jags_columns.Rd
index d9da830d917..7172345230f 100644
--- a/modules/meta.analysis/man/rename_jags_columns.Rd
+++ b/modules/meta.analysis/man/rename_jags_columns.Rd
@@ -10,7 +10,7 @@ rename_jags_columns(data)
 \item{data}{data frame to with variables to rename}
 }
 \description{
-renames the variables within output data frame trait.data
+Renames the variables within output data frame trait.data
 }
 \seealso{
 used with \code{\link[PEcAn.MA]{jagify}};

From 7859206be1b14ba27e986f62cf81cbdfcd9a96b0 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 28 Jul 2024 18:54:07 +0530
Subject: [PATCH 080/155] Update NAMESPACE

updated NAMESPACE after keras3 addition to DESCRIPTION
---
 modules/assim.sequential/NAMESPACE | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index bb8aa415da4..5a69d8bd9ef 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(outlier,detector.boxplot)
 export(Analysis.sda)
 export(Construct.H.multisite)
 export(Construct.R)
@@ -21,7 +22,6 @@ export(SDA_control)
 export(SDA_remote_launcher)
 export(SDA_timeseries_plot)
 export(adj.ens)
-export(aggregate)
 export(alltocs)
 export(alr)
 export(assessParams)
@@ -37,7 +37,6 @@ export(load_data_paleon_sda)
 export(matrix_network)
 export(metSplit)
 export(obs_timestep2timepoint)
-export(outlier.detector.boxplot)
 export(piecew.poly.local)
 export(post.analysis.ggplot)
 export(post.analysis.ggplot.violin)
@@ -56,7 +55,6 @@ export(sda_weights_site)
 export(simple.local)
 export(tobit.model)
 export(tobit2space.model)
-export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)

From e8c40ac3b5a8451f3ee63a7cf1983662c4669064 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 28 Jul 2024 19:02:56 +0530
Subject: [PATCH 081/155] Update DESCRIPTION with keras3

added keras3 to the DESCRIPTION due to its involvement with the CNN in downscaling function
---
 modules/assim.sequential/DESCRIPTION | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 8c9721d2483..9a3857ee803 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -1,7 +1,8 @@
 Package: PEcAnAssimSequential
 Type: Package
 Title: PEcAn Functions Used for Ecological Forecasts and Reanalysis
-Version: 1.8.0.9000
+Version: 1.7.2
+Date: 2021-10-04
 Author: Mike Dietze
 Maintainer: Mike Dietze <dietze@bu.edu>
 Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
@@ -32,7 +33,6 @@ Imports:
     stringr
 Suggests:
     corrplot,
-    exactextractr,
     ggrepel,
     emdbook,
     glue,
@@ -48,6 +48,7 @@ Suggests:
     plotrix,
     plyr (>= 1.8.4),
     randomForest,
+    keras3,
     raster,
     readr,
     reshape2 (>= 1.4.2),
@@ -64,4 +65,4 @@ Suggests:
 License: BSD_3_clause + file LICENSE
 Copyright: Authors
 Encoding: UTF-8
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.1

From 53a9caeb3424407ae3187328f84236004fd3ad80 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 28 Jul 2024 19:28:27 +0530
Subject: [PATCH 082/155] Update pecan_package_dependencies.csv

ran generate_dependencies.R after adding keras3 and pushed the changed pecan_package_dependendcies.csv to the PR as suggested
---
 docker/depends/pecan_package_dependencies.csv | 87 +++++++++----------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index f8894581945..2b12a43c9c8 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -61,7 +61,6 @@
 "dplyr",">= 1.1.2","base/db","Imports",FALSE
 "ellipse","*","modules/assim.batch","Imports",FALSE
 "emdbook","*","modules/assim.sequential","Suggests",FALSE
-"exactextractr","*","modules/assim.sequential","Suggests",FALSE
 "foreach","*","base/remote","Imports",FALSE
 "foreach","*","modules/data.atmosphere","Suggests",FALSE
 "foreach","*","modules/data.remote","Imports",FALSE
@@ -122,6 +121,7 @@
 "jsonlite","*","models/stics","Imports",FALSE
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
+"keras3","*","modules/assim.sequential","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE
 "knitr",">= 1.42","base/qaqc","Suggests",FALSE
 "knitr",">= 1.42","modules/allometry","Suggests",FALSE
@@ -487,48 +487,48 @@
 "rmarkdown",">= 2.19","modules/assim.batch","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/meta.analysis","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/photosynthesis","Suggests",FALSE
-"roxygen2","== 7.3.2","base/all","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/db","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/logger","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/qaqc","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/remote","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/settings","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/utils","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/visualization","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/workflow","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/basgra","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/biocro","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/cable","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/clm45","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/dalec","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/dvmdostem","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/ed","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/fates","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/gday","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/jules","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/ldndc","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/linkages","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/lpjguess","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/maat","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/maespa","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/preles","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/sibcasa","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/sipnet","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/stics","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/template","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/allometry","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/assim.batch","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/assim.sequential","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/benchmark","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.atmosphere","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.land","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.remote","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/emulator","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/meta.analysis","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/photosynthesis","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/priors","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/rtm","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/uncertainty","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/all","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/db","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/logger","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/qaqc","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/remote","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/settings","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/utils","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/visualization","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/workflow","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/basgra","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/biocro","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/cable","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/clm45","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/dalec","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/dvmdostem","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/ed","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/fates","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/gday","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/jules","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/ldndc","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/linkages","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/lpjguess","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/maat","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/maespa","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/preles","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/sibcasa","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/sipnet","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/stics","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/template","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/allometry","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/assim.batch","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/assim.sequential","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/benchmark","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.atmosphere","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.land","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.remote","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/emulator","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/meta.analysis","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/photosynthesis","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/priors","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/rtm","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/uncertainty","Roxygen",FALSE
 "RPostgres","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","models/biocro","Suggests",FALSE
@@ -609,7 +609,6 @@
 "testthat",">= 3.0.4","base/qaqc","Suggests",FALSE
 "tibble","*","base/db","Imports",FALSE
 "tibble","*","models/ed","Imports",FALSE
-"tibble","*","models/fates","Imports",FALSE
 "tibble","*","models/lpjguess","Imports",FALSE
 "tibble","*","modules/data.atmosphere","Imports",FALSE
 "tibble","*","modules/data.remote","Suggests",FALSE

From b9cc4fbf8d3c7172cc0a755c8f82ef0cfd30a07c Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 28 Jul 2024 22:59:48 +0530
Subject: [PATCH 083/155] Update pecan_package_dependencies.csv for some
 changes

attested to some changes in the pecan dependencies , rechecked dependencies and commited accordingly.
---
 docker/depends/pecan_package_dependencies.csv | 86 ++++++++++---------
 1 file changed, 44 insertions(+), 42 deletions(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 2b12a43c9c8..0d677a5a02c 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -61,6 +61,7 @@
 "dplyr",">= 1.1.2","base/db","Imports",FALSE
 "ellipse","*","modules/assim.batch","Imports",FALSE
 "emdbook","*","modules/assim.sequential","Suggests",FALSE
+"exactextractr","*","modules/assim.sequential","Suggests",FALSE
 "foreach","*","base/remote","Imports",FALSE
 "foreach","*","modules/data.atmosphere","Suggests",FALSE
 "foreach","*","modules/data.remote","Imports",FALSE
@@ -487,48 +488,48 @@
 "rmarkdown",">= 2.19","modules/assim.batch","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/meta.analysis","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/photosynthesis","Suggests",FALSE
-"roxygen2","== 7.3.1","base/all","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/db","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/logger","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/qaqc","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/remote","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/settings","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/utils","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/visualization","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/workflow","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/basgra","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/biocro","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/cable","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/clm45","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/dalec","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/dvmdostem","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/ed","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/fates","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/gday","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/jules","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/ldndc","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/linkages","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/lpjguess","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/maat","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/maespa","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/preles","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/sibcasa","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/sipnet","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/stics","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/template","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/allometry","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/assim.batch","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/assim.sequential","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/benchmark","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.atmosphere","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.land","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.remote","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/emulator","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/meta.analysis","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/photosynthesis","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/priors","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/rtm","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/uncertainty","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/all","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/db","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/logger","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/qaqc","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/remote","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/settings","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/utils","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/visualization","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/workflow","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/basgra","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/biocro","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/cable","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/clm45","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/dalec","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/dvmdostem","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/ed","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/fates","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/gday","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/jules","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/ldndc","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/linkages","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/lpjguess","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/maat","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/maespa","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/preles","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/sibcasa","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/sipnet","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/stics","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/template","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/allometry","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/assim.batch","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/assim.sequential","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/benchmark","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.atmosphere","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.land","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.remote","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/emulator","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/meta.analysis","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/photosynthesis","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/priors","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/rtm","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/uncertainty","Roxygen",FALSE
 "RPostgres","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","models/biocro","Suggests",FALSE
@@ -609,6 +610,7 @@
 "testthat",">= 3.0.4","base/qaqc","Suggests",FALSE
 "tibble","*","base/db","Imports",FALSE
 "tibble","*","models/ed","Imports",FALSE
+"tibble","*","models/fates","Imports",FALSE
 "tibble","*","models/lpjguess","Imports",FALSE
 "tibble","*","modules/data.atmosphere","Imports",FALSE
 "tibble","*","modules/data.remote","Suggests",FALSE

From d659567511f32205ef6b31d7ef287aba5fe99a02 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 01:02:18 +0530
Subject: [PATCH 084/155] Update NAMESPACE

updated namespace with some change in dependencies
---
 modules/assim.sequential/NAMESPACE | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 5a69d8bd9ef..6ba3a777359 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -22,6 +22,7 @@ export(SDA_control)
 export(SDA_remote_launcher)
 export(SDA_timeseries_plot)
 export(adj.ens)
+export(aggregate)
 export(alltocs)
 export(alr)
 export(assessParams)
@@ -37,6 +38,7 @@ export(load_data_paleon_sda)
 export(matrix_network)
 export(metSplit)
 export(obs_timestep2timepoint)
+export(outlier.detector.boxplot)
 export(piecew.poly.local)
 export(post.analysis.ggplot)
 export(post.analysis.ggplot.violin)
@@ -55,6 +57,7 @@ export(sda_weights_site)
 export(simple.local)
 export(tobit.model)
 export(tobit2space.model)
+export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)

From aacc890ee39615b58ec257c0f5af4a21b4e5c53e Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 03:39:49 +0530
Subject: [PATCH 085/155] Reverting NAMESPACE

since the code passed 11 of 13 GitHub actions with old namespace and pecan_package_dependencies , perhaps reverting to them one by one would be helpful in figuring out the error here. starting by reverting to the exiting version of namespace
---
 modules/assim.sequential/NAMESPACE | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 6ba3a777359..bb8aa415da4 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,6 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(outlier,detector.boxplot)
 export(Analysis.sda)
 export(Construct.H.multisite)
 export(Construct.R)

From 3f15632edc75cd0fa2d5a5fd1ed51c2311a6af22 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Tue, 16 Jul 2024 07:00:16 -0700
Subject: [PATCH 086/155] fixes for new check errors in R >= 4.4.0

---
 base/db/tests/Rcheck_reference.log            |   3 +-
 base/qaqc/tests/Rcheck_reference.log          |   1 +
 base/remote/tests/Rcheck_reference.log        |   3 +-
 base/settings/R/check.all.settings.R          |   5 +-
 base/settings/man/check.workflow.settings.Rd  |   2 +
 base/settings/tests/Rcheck_reference.log      |  36 +-----
 base/utils/tests/Rcheck_reference.log         |   3 +-
 base/visualization/DESCRIPTION                |   2 +
 base/visualization/R/plots.R                  |   2 +-
 base/visualization/man/theme_border.Rd        |   2 +-
 base/visualization/tests/Rcheck_reference.log |   3 +-
 base/visualization/vignettes/usmap.Rmd        |  20 ++-
 base/workflow/tests/Rcheck_reference.log      |   2 +-
 docker/depends/pecan_package_dependencies.csv |   1 +
 modules/allometry/R/AllomAve.R                |  29 +++--
 modules/allometry/R/allom.BayesFit.R          |  46 ++++---
 modules/allometry/R/allom.predict.R           |  25 ++--
 modules/allometry/R/query.allom.data.R        |  25 ++--
 modules/allometry/R/read.allom.data.R         |  12 +-
 modules/allometry/man/AllomAve.Rd             |  16 +--
 modules/allometry/man/allom.BayesFit.Rd       |  30 ++---
 modules/allometry/man/allom.predict.Rd        |   8 +-
 modules/allometry/man/load.allom.Rd           |   2 +-
 modules/allometry/man/query.allom.data.Rd     |   6 +-
 modules/allometry/man/read.allom.data.Rd      |   2 +-
 modules/allometry/tests/Rcheck_reference.log  |   3 +-
 .../assim.batch/tests/Rcheck_reference.log    |   3 +-
 modules/assim.sequential/R/Analysis_sda.R     |   1 +
 modules/assim.sequential/R/GEF_Helper.R       |   3 +-
 .../R/Multi_Site_Constructors.R               |   3 +
 modules/assim.sequential/R/Remote_helpers.R   |   1 +
 modules/assim.sequential/R/hop_test.R         |   1 +
 .../assim.sequential/R/load_data_paleon_sda.R |   2 +-
 modules/assim.sequential/R/sda.enkf.R         |  14 +--
 .../assim.sequential/R/sda.enkf_MultiSite.R   |  19 +--
 .../assim.sequential/R/sda.enkf_refactored.R  |  38 ++++--
 modules/assim.sequential/R/sda_plotting.R     |   3 +
 modules/assim.sequential/man/Contruct.Pf.Rd   |   6 +
 modules/assim.sequential/man/GEF.Rd           |   2 +
 .../man/SDA_remote_launcher.Rd                |   2 +
 modules/assim.sequential/man/hop_test.Rd      |   2 +
 .../man/interactive.plotting.sda.Rd           |   6 +
 modules/assim.sequential/man/sda.enkf.Rd      |  51 ++++----
 .../man/sda.enkf.multisite.Rd                 |  14 ++-
 .../man/tobit_model_censored.Rd               |   5 +
 .../tests/Rcheck_reference.log                |  10 +-
 modules/benchmark/tests/Rcheck_reference.log  |   3 +-
 modules/data.atmosphere/DESCRIPTION           |   2 +
 modules/data.atmosphere/R/closest_xy.R        |   7 +-
 .../data.atmosphere/R/download.Ameriflux.R    |   1 +
 .../data.atmosphere/R/download.AmerifluxLBL.R |   1 +
 modules/data.atmosphere/R/download.ERA5.R     |   2 +
 .../data.atmosphere/R/download.Fluxnet2015.R  |   2 +
 .../R/download.FluxnetLaThuile.R              |   1 +
 modules/data.atmosphere/R/download.GFDL.R     |   2 +
 .../data.atmosphere/R/download.NARR_site.R    |   5 +-
 modules/data.atmosphere/R/download.NEONmet.R  |   2 +
 modules/data.atmosphere/R/download.PalEON.R   |   4 +-
 modules/data.atmosphere/R/extract.nc.R        |   2 +
 modules/data.atmosphere/R/extract_ERA5.R      |   9 +-
 modules/data.atmosphere/R/lightME.R           |   2 +-
 .../data.atmosphere/R/merge.met.variable.R    |  15 ++-
 modules/data.atmosphere/R/met.process.R       |   2 +
 modules/data.atmosphere/R/met2CF.ALMA.R       |   6 +
 modules/data.atmosphere/R/met2CF.Ameriflux.R  |   1 +
 .../data.atmosphere/R/met2CF.AmerifluxLBL.R   |   1 +
 .../data.atmosphere/R/metgapfill.NOAA_GEFS.R  |  34 +++---
 modules/data.atmosphere/R/metgapfill.R        |   2 +
 modules/data.atmosphere/R/nc_merge.R          |   2 +
 modules/data.atmosphere/R/permute.nc.R        |   1 +
 modules/data.atmosphere/R/split_wind.R        |   6 +-
 .../R/tdm_generate_subdaily_models.R          |  80 ++++++------
 .../data.atmosphere/R/tdm_lm_ensemble_sims.R  |   1 +
 modules/data.atmosphere/R/tdm_model_train.R   |   3 +
 .../R/tdm_predict_subdaily_met.R              |   2 +
 .../R/tdm_temporal_downscale_functions.R      |   2 +
 modules/data.atmosphere/man/closest_xy.Rd     |  11 +-
 modules/data.atmosphere/man/daygroup.Rd       |  11 --
 .../data.atmosphere/man/download.Ameriflux.Rd |   2 +
 .../man/download.AmerifluxLBL.Rd              |   2 +
 .../data.atmosphere/man/download.ERA5.old.Rd  |  30 ++---
 .../man/download.Fluxnet2015.Rd               |   4 +
 .../man/download.FluxnetLaThuile.Rd           |   2 +
 modules/data.atmosphere/man/download.GFDL.Rd  |   2 +
 .../data.atmosphere/man/download.NARR_site.Rd |   5 +
 .../data.atmosphere/man/download.NEONmet.Rd   |   2 +
 .../data.atmosphere/man/extract.nc.ERA5.Rd    |   9 +-
 modules/data.atmosphere/man/extract.nc.Rd     |   2 +
 .../man/gen.subdaily.models.Rd                |   9 +-
 modules/data.atmosphere/man/lightME.Rd        |   2 +-
 .../data.atmosphere/man/lm_ensemble_sims.Rd   |   2 +
 .../data.atmosphere/man/merge_met_variable.Rd |   9 +-
 modules/data.atmosphere/man/met.process.Rd    |   3 +
 modules/data.atmosphere/man/met2CF.ALMA.Rd    |   2 +
 .../data.atmosphere/man/met2CF.Ameriflux.Rd   |   2 +
 .../man/met2CF.AmerifluxLBL.Rd                |   2 +
 modules/data.atmosphere/man/met2CF.PalEON.Rd  |   6 +
 .../man/met2CF.PalEONregional.Rd              |   4 +
 .../man/metgapfill.NOAA_GEFS.Rd               |  14 +--
 modules/data.atmosphere/man/metgapfill.Rd     |   2 +
 modules/data.atmosphere/man/model.train.Rd    |   4 +
 modules/data.atmosphere/man/nc.merge.Rd       |   2 +
 modules/data.atmosphere/man/permute.nc.Rd     |   2 +
 .../man/predict_subdaily_met.Rd               |   2 +
 modules/data.atmosphere/man/split_wind.Rd     |   7 +-
 .../man/temporal.downscale.functions.Rd       |   2 +
 .../tests/Rcheck_reference.log                | 114 ++----------------
 .../vignettes/ameriflux_demo.Rmd              |  19 +--
 .../vignettes/cfmet_downscaling.Rmd           |  16 ++-
 .../vignettes/compare_narr_cruncep_met.Rmd    |  48 +++++---
 .../vignettes/tdm_downscaling.Rmd             |  17 ++-
 111 files changed, 560 insertions(+), 490 deletions(-)
 delete mode 100644 modules/data.atmosphere/man/daygroup.Rd

diff --git a/base/db/tests/Rcheck_reference.log b/base/db/tests/Rcheck_reference.log
index cbc3757f683..87ade55021c 100644
--- a/base/db/tests/Rcheck_reference.log
+++ b/base/db/tests/Rcheck_reference.log
@@ -62,7 +62,8 @@ The Date field is over a month old.
 * checking package directory ... OK
 * checking for future file timestamps ... OK
 * checking ‘build’ directory ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/base/qaqc/tests/Rcheck_reference.log b/base/qaqc/tests/Rcheck_reference.log
index dcbf435c84f..593c738f659 100644
--- a/base/qaqc/tests/Rcheck_reference.log
+++ b/base/qaqc/tests/Rcheck_reference.log
@@ -24,6 +24,7 @@ Malformed Description field: should contain one or more complete sentences.
 Authors@R field gives no person with name and roles.
 Authors@R field gives no person with maintainer role, valid email
 address and non-empty name.
+License stub is invalid DCF.
 * checking top-level files ... NOTE
 Non-standard file/directory found at top level:
   ‘README.Rmd’
diff --git a/base/remote/tests/Rcheck_reference.log b/base/remote/tests/Rcheck_reference.log
index 71001fb962e..ca6f1d67b77 100644
--- a/base/remote/tests/Rcheck_reference.log
+++ b/base/remote/tests/Rcheck_reference.log
@@ -19,7 +19,8 @@
 * checking whether package ‘PEcAn.remote’ can be installed ... OK
 * checking installed package size ... OK
 * checking package directory ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/base/settings/R/check.all.settings.R b/base/settings/R/check.all.settings.R
index c7b665addca..6b6a20b3998 100644
--- a/base/settings/R/check.all.settings.R
+++ b/base/settings/R/check.all.settings.R
@@ -936,9 +936,10 @@ check.model.settings <- function(settings, dbcon = NULL) {
   return(settings)
 }
 
-#' @title Check Workflow Settings
+#' Check Workflow Settings
 #' @param settings settings file
-#' @export check.workflow.settings
+#' @param dbcon database connection
+#' @export
 check.workflow.settings <- function(settings, dbcon = NULL) {
   # check for workflow defaults
   fixoutdir <- FALSE
diff --git a/base/settings/man/check.workflow.settings.Rd b/base/settings/man/check.workflow.settings.Rd
index 00416872adf..edf68041661 100644
--- a/base/settings/man/check.workflow.settings.Rd
+++ b/base/settings/man/check.workflow.settings.Rd
@@ -8,6 +8,8 @@ check.workflow.settings(settings, dbcon = NULL)
 }
 \arguments{
 \item{settings}{settings file}
+
+\item{dbcon}{database connection}
 }
 \description{
 Check Workflow Settings
diff --git a/base/settings/tests/Rcheck_reference.log b/base/settings/tests/Rcheck_reference.log
index e981f489931..8dcb48578fa 100644
--- a/base/settings/tests/Rcheck_reference.log
+++ b/base/settings/tests/Rcheck_reference.log
@@ -73,7 +73,8 @@ The Date field is over a month old.
 * checking installed package size ... OK
 * checking package directory ... OK
 * checking for future file timestamps ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... NOTE
 Non-standard file/directory found at top level:
   ‘examples’
@@ -140,36 +141,7 @@ All user-level objects in a package should have documentation entries.
 See chapter ‘Writing R documentation files’ in the ‘Writing R
 Extensions’ manual.
 * checking for code/documentation mismatches ... OK
-* checking Rd \usage sections ... WARNING
-Undocumented arguments in documentation object 'addSecrets'
-  ‘force’
-
-Undocumented arguments in documentation object 'check.model.settings'
-  ‘dbcon’
-
-Undocumented arguments in documentation object 'check.run.settings'
-  ‘dbcon’
-
-Undocumented arguments in documentation object 'check.settings'
-  ‘force’
-
-Undocumented arguments in documentation object 'check.workflow.settings'
-  ‘dbcon’
-
-Undocumented arguments in documentation object 'clean.settings'
-  ‘write’
-
-Undocumented arguments in documentation object 'fix.deprecated.settings'
-  ‘force’
-
-Undocumented arguments in documentation object 'update.settings'
-  ‘force’
-
-Functions with \usage entries need to have the appropriate \alias
-entries, and all their arguments documented.
-The \usage entries must correspond to syntactically valid R code.
-See chapter ‘Writing R documentation files’ in the ‘Writing R
-Extensions’ manual.
+* checking Rd \usage sections ... OK
 * checking Rd contents ... OK
 * checking for unstated dependencies in examples ... OK
 * checking examples ... OK
@@ -179,4 +151,4 @@ Extensions’ manual.
 * checking for detritus in the temp directory ... OK
 * DONE
 
-Status: 5 WARNINGs, 3 NOTEs
+Status: 3 WARNINGs, 2 NOTEs
diff --git a/base/utils/tests/Rcheck_reference.log b/base/utils/tests/Rcheck_reference.log
index 087e10096eb..ac9acddcb22 100644
--- a/base/utils/tests/Rcheck_reference.log
+++ b/base/utils/tests/Rcheck_reference.log
@@ -71,7 +71,8 @@ The Date field is over a month old.
 * checking installed package size ... OK
 * checking package directory ... OK
 * checking for future file timestamps ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION
index 43704b5fb15..c7f735f435f 100644
--- a/base/visualization/DESCRIPTION
+++ b/base/visualization/DESCRIPTION
@@ -38,6 +38,7 @@ Imports:
     stringr(>= 1.1.0)
 Suggests:
     grid,
+    knitr,
     mockery,
     png,
     raster,
@@ -49,5 +50,6 @@ Copyright: Authors
 LazyLoad: yes
 LazyData: FALSE
 Encoding: UTF-8
+VignetteBuilder: knitr
 RoxygenNote: 7.3.2
 Roxygen: list(markdown = TRUE)
diff --git a/base/visualization/R/plots.R b/base/visualization/R/plots.R
index 3f1395305bc..85b6534142f 100644
--- a/base/visualization/R/plots.R
+++ b/base/visualization/R/plots.R
@@ -263,7 +263,7 @@ plot_data <- function(trait.data, base.plot = NULL, ymax) {
 ##' 
 ##' @return adds borders to ggplot as a side effect
 ##' @author Rudolf Cardinal
-##' @author \url{ggplot2 google group}{https://groups.google.com/forum/?fromgroups#!topic/ggplot2/-ZjRE2OL8lE}
+##' @author [ggplot2 google group](https://groups.google.com/forum/?fromgroups#!topic/ggplot2/-ZjRE2OL8lE)
 ##' @examples
 ##' \dontrun{
 ##' df = data.frame( x=c(1,2,3), y=c(4,5,6) )
diff --git a/base/visualization/man/theme_border.Rd b/base/visualization/man/theme_border.Rd
index cc77306dc9b..a003e9ffea4 100644
--- a/base/visualization/man/theme_border.Rd
+++ b/base/visualization/man/theme_border.Rd
@@ -47,5 +47,5 @@ ggplot(data=df, aes(x=x, y=y)) + geom_point() + theme_bw() +
 \author{
 Rudolf Cardinal
 
-\url{ggplot2 google group}{https://groups.google.com/forum/?fromgroups#!topic/ggplot2/-ZjRE2OL8lE}
+\href{https://groups.google.com/forum/?fromgroups#!topic/ggplot2/-ZjRE2OL8lE}{ggplot2 google group}
 }
diff --git a/base/visualization/tests/Rcheck_reference.log b/base/visualization/tests/Rcheck_reference.log
index a578ea6cf0c..0c65006916e 100644
--- a/base/visualization/tests/Rcheck_reference.log
+++ b/base/visualization/tests/Rcheck_reference.log
@@ -71,7 +71,8 @@ The Date field is over a month old.
 * checking installed package size ... OK
 * checking package directory ... OK
 * checking for future file timestamps ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/base/visualization/vignettes/usmap.Rmd b/base/visualization/vignettes/usmap.Rmd
index dff4e2b9ab4..c7d9a9ae0b3 100644
--- a/base/visualization/vignettes/usmap.Rmd
+++ b/base/visualization/vignettes/usmap.Rmd
@@ -1,7 +1,19 @@
+---
+title: "Maps"
+output: html_vignette
+vignette: >
+   %\VignetteIndexEntry{Maps}
+   %\VignetteEngine{knitr::rmarkdown}
+---
+
+
+
 Map
 ========================================================
 
-```{r}
+(all code chunks are disabled because vignette build was throwing errors. TODO: debug and re-enable.)
+
+```{r,eval=FALSE}
 require(raster)
 require(sp)
 require(ggplot2)
@@ -20,7 +32,7 @@ spplot(spdf)
 
 ### Plot all maps for BETYdb
 
-```{r}
+```{r,eval=FALSE}
 files <- dir("~/dev/bety/local/modelout", pattern="grid.csv", full.names=TRUE)
 yieldfiles <- files[!grepl("evapotranspiration", files)]
 etfiles <- files[grepl("evapotranspiration", files)]
@@ -42,7 +54,7 @@ for(file in etfiles){
 ```
 
 ### Misc additional code
-```{r}
+```{r,eval=FALSE}
 # Make an evenly spaced raster, the same extent as original data
 e <- extent( spdf )
 
@@ -63,7 +75,7 @@ ggplot( NULL ) + geom_raster( data = rdf , aes( x , y , fill = layer ) )
 
 ```
 
-```{r}
+```{r,eval=FALSE}
 # from http://gis.stackexchange.com/a/20052/3218
 require(rgdal)
 proj4string(spdf) <- CRS("+init=epsg:4326")
diff --git a/base/workflow/tests/Rcheck_reference.log b/base/workflow/tests/Rcheck_reference.log
index 3d3e30ec6d6..f6fb0254800 100644
--- a/base/workflow/tests/Rcheck_reference.log
+++ b/base/workflow/tests/Rcheck_reference.log
@@ -70,7 +70,7 @@ Author field differs from that derived from Authors@R
 Maintainer field differs from that derived from Authors@R
   Maintainer: ‘David LeBauer <dlebauer@illinois.edu>’
   Authors@R:  ‘David LeBauer <dlebauer@email.arizona.edu>’
-
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index f8894581945..54bde494f59 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -122,6 +122,7 @@
 "jsonlite","*","models/stics","Imports",FALSE
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
+"knitr","*","base/visualization","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE
 "knitr",">= 1.42","base/qaqc","Suggests",FALSE
 "knitr",">= 1.42","modules/allometry","Suggests",FALSE
diff --git a/modules/allometry/R/AllomAve.R b/modules/allometry/R/AllomAve.R
index 47ea7354891..f6aba498caf 100644
--- a/modules/allometry/R/AllomAve.R
+++ b/modules/allometry/R/AllomAve.R
@@ -7,16 +7,27 @@
 # http://opensource.ncsa.illinois.edu/license.html
 #-------------------------------------------------------------------------------
 
-#' @title AllomAve
-#' @name  AllomAve
-#' @aliases AllomAve
+#' AllomAve
+#'
+#' Allometery wrapper function that handles loading and subsetting the data,
+#'  fitting the Bayesian models, and generating diagnostic figures. Set up to loop over
+#'   multiple PFTs and components. 
+#'   Writes raw MCMC and PDF of diagnositcs to file and returns table of summary stats.
+#'   
+#' There are two usages of this function. 
+#' When running 'online' (connected to the PEcAn database), pass the database connection,
+#'  con, and the pfts subsection of the PEcAn settings.
+#' When running 'stand alone' pass the pft list mapping species to species codes
+#'  and the file paths to the allometry table and field data (optional)
+#'
 #' @param pfts        pft list from PEcAn settings (if con) OR list of pft spcd's
 #' If the latter, the names within the list are used to identify PFTs
-#' \itemize{
+#' \describe{
 #'   \item{'acronym'}{ - USDA species acronyms (see plants.usda.gov), used with FIELD data (vector)}
 #'   \item{'spcd'}{ - USFS species codes, use with PARM data (vector)}
 #' }
-#' @param components  IDs for allometry components from Jenkins et al 2004 Table 5. Default is stem biomass (6). See data(allom.components)
+#' @param components  IDs for allometry components from Jenkins et al 2004 Table 5.
+#'  Default is stem biomass (6). See data(allom.components)
 #' @param outdir      output directory files are written to. Default is getwd()
 #' @param con         database connection
 #' @param field       path(s) to raw data files
@@ -27,14 +38,6 @@
 #' @param dmax        maximum dbh of interest
 #' @return nested list of parameter summary statistics
 #' @export
-#' @description allometery wrapper function that handles loading and subsetting the data,
-#'  fitting the Bayesian models, and generating diagnostic figures. Set up to loop over
-#'   multiple PFTs and components. 
-#'   Writes raw MCMC and PDF of diagnositcs to file and returns table of summary stats.
-#'   
-#' @details There are two usages of this function. 
-#' When running 'online' (connected to the PEcAn database), pass the database connection, con, and the pfts subsection of the PEcAn settings.
-#' When running 'stand alone' pass the pft list mapping species to species codes and the file paths to the allometry table and field data (optional)
 #' 
 #' @examples 
 #' 
diff --git a/modules/allometry/R/allom.BayesFit.R b/modules/allometry/R/allom.BayesFit.R
index beeb35ea1d4..7ddc1011ac2 100644
--- a/modules/allometry/R/allom.BayesFit.R
+++ b/modules/allometry/R/allom.BayesFit.R
@@ -7,35 +7,38 @@
 # http://opensource.ncsa.illinois.edu/license.html
 #-------------------------------------------------------------------------------
 
-#' @title allom.BayesFit
-#' @name  allom.BayesFit
-#' @aliases allom.BayesFit
+#' allom.BayesFit
 #'
-#' @description Module to fit a common power-law allometric model
+#' Module to fit a common power-law allometric model
 #' to a mixture of raw data and allometric equations
 #' in a Heirarchical Bayes framework with multiple imputation
 #' of the allometric data
 #'
+#' dependencies: requires MCMCpack and mvtnorm
+#'
+#' note: runs 1 chain, but multiple chains can be simulated by
+#'       multiple function calls
+#'
 #' @param allom - object (usually generated by query.allom.data) which
 #'                  needs to be a list with two entries:
 #'            'field' - contains a list, each entry for which is
 #'                      a data frame with 'x' and 'y'. Can be NULL
 #'            'parm' -  a single data frame with the following components:
-#'            \itemize{
-#'                   \item{n}     {sample size}
-#'                   \item{a}     {eqn coefficient}
-#'                   \item{b}     {eqn coefficient}
-#'                   \item{c}     {eqn coefficient}
-#'                   \item{d}     {eqn coefficient}
-#'                   \item{e}     {eqn coefficient}
-#'                   \item{se}    {standard error}
-#'                   \item{eqn}   {sample size}
-#'                   \item{Xmin}  {smallest tree sampled (cm)}
-#'                   \item{Xmax}  {largest tree sampled (cm)}
-#'                   \item{Xcor}  {units correction on X}
-#'                   \item{Ycor}  {units correction on Y}
-#'                   \item{Xtype} {type of measurement on the X}
-#'                   \item{spp}   { - USFS species code}
+#'            \describe{
+#'                   \item{n}{sample size}
+#'                   \item{a}{eqn coefficient}
+#'                   \item{b}{eqn coefficient}
+#'                   \item{c}{eqn coefficient}
+#'                   \item{d}{eqn coefficient}
+#'                   \item{e}{eqn coefficient}
+#'                   \item{se}{standard error}
+#'                   \item{eqn}{sample size}
+#'                   \item{Xmin}{smallest tree sampled (cm)}
+#'                   \item{Xmax}{largest tree sampled (cm)}
+#'                   \item{Xcor}{units correction on X}
+#'                   \item{Ycor}{units correction on Y}
+#'                   \item{Xtype}{type of measurement on the X}
+#'                   \item{spp}{ - USFS species code}
 #'          }
 #' @param nrep - number of MCMC replicates
 #'
@@ -43,11 +46,6 @@
 
 #' @param dmin   minimum dbh of interest
 #' @param dmax   maximum dbh of interest
-
-#' @details  dependencies: requires MCMCpack and mvtnorm
-#'
-#' note: runs 1 chain, but multiple chains can be simulated by
-#'       multiple function calls
 #'
 #' @return returns MCMC chain and ONE instance of 'data'
 #' note: in many cases the estimates are multiply imputed
diff --git a/modules/allometry/R/allom.predict.R b/modules/allometry/R/allom.predict.R
index 16f6210919f..5dc032caa9f 100644
--- a/modules/allometry/R/allom.predict.R
+++ b/modules/allometry/R/allom.predict.R
@@ -7,27 +7,29 @@
 # http://opensource.ncsa.illinois.edu/license.html
 #-------------------------------------------------------------------------------
 
-#' @title allom.predict
-#' @name  allom.predict
-#' @aliases allom.predict
+#' allom.predict
+#'
+#' Function for making tree-level Monte Carlo predictions
+#' from allometric equations estimated from the PEcAn allometry module
 #'
 #' @param object Allometry model object. Option includes
-#'\itemize{
+#'\describe{
 #'   \item{'list of mcmc'}{ - mcmc outputs in a list by PFT then component}
 #'   \item{'vector of file paths'}{ - path(s) to AllomAve RData files}
 #'   \item{'directory where files are located}{ - }
 #' }
 #' @param dbh Diameter at Breast Height (cm)
-#' @param pft Plant Functional Type. Needs to match the name used in AllomAve. Can be NULL if only one PFT/species exists, otherwise needs to the same length as dbh
+#' @param pft Plant Functional Type. Needs to match the name used in AllomAve.
+#'  Can be NULL if only one PFT/species exists, otherwise needs to the same length as dbh
 #' @param component Which component to predict. Can be NULL if only one component was analysed in AllomAve.
 #' @param n Number of Monte Carlo samples. Defaults to the same number as in the MCMC object
 #' @param use  c('Bg','mu','best')
 #' @param interval c('none','confidence','prediction') default is prediction
+#' @param single.tree logical: Is this a DBH time series from one indidual tree?
+#'  If TRUE, will use a fixed error for all draws.
 #'
 #' @return matrix of Monte Carlo predictions that has n rows and one column per DBH
 #'
-#' @description Function for making tree-level Monte Carlo predictions
-#' from allometric equations estimated from the PEcAn allometry module
 #'
 #' @examples
 #'
@@ -240,19 +242,18 @@ allom.predict <- function(object, dbh, pft = NULL, component = NULL, n = NULL, u
   return(out)
 } # allom.predict
 
-#' @title load.allom
-#' @name  load.allom
+#' load.allom
+#'
+#' loads allom files
 #'
 #' @param object Allometry model object. Option includes
-#'\itemize{
+#'\describe{
 #'   \item{'vector of file paths'}{ - path(s) to AllomAve RData files}
 #'   \item{'directory where files are located}{ - }
 #' }
 #'
 #' @return mcmc outputs in a list by PFT then component
 #'
-#' @description loads allom files
-#'
 #' @examples
 #'
 #' \dontrun{
diff --git a/modules/allometry/R/query.allom.data.R b/modules/allometry/R/query.allom.data.R
index f5942adf8e2..8234e0b2dfd 100644
--- a/modules/allometry/R/query.allom.data.R
+++ b/modules/allometry/R/query.allom.data.R
@@ -7,15 +7,16 @@
 # http://opensource.ncsa.illinois.edu/license.html
 #-------------------------------------------------------------------------------
 
-#' @title query.allom.data
-#' @name  query.allom.data
-#' @description
+#' query.allom.data
+#'
 #' Module to grab allometric information from the raw data table
 #' Will grab both original field data and tallied equations
 #'
 #' Tallied equation format based on Jenkins et al 2004 USFS
 #' General Technical Report NE-319
 #'
+#' database is assumed to conform to the PEcAn Schema
+#'
 #' @author Michael Dietze
 #'
 #' @param pft_name   name of Plant Functional Type to be queried
@@ -23,7 +24,6 @@
 #' @param con        open database connection
 #' @param nsim       number of pseudo-data simulations for estimating SE
 #'
-#' @details database is assumed to conform to the PEcAn Schema
 query.allom.data <- function(pft_name, variable, con, nsim = 10000) {
   
   ## check validity of inputs
@@ -80,21 +80,22 @@ query.allom.data <- function(pft_name, variable, con, nsim = 10000) {
   return(allom)
 } # query.allom.data
 
-#' @title nu
-#' @name  nu
+#' nu
+#'
+#' converts factors to numeric
+#'
 #' @param x  data
-#' @description  converts factors to numeric
 nu <- function(x) {
   as.numeric(as.character(x))
 } # nu
 
-#' @title AllomUnitCoef
-#' @name  AllomUnitCoef
-#' @param x   units: mm, cm, cm2, m, in, g, kg, lb, Mg
-#' @param tp  diameter type, leave NULL if DBH. Options: 'd.b.h.^2','cbh','crc'
-#' @description
+#' AllomUnitCoef
+#'
 #'  converts length units FROM cm TO specified units
 #'  converts mass units TO kg FROM specificed units
+#'
+#' @param x   units: mm, cm, cm2, m, in, g, kg, lb, Mg
+#' @param tp  diameter type, leave NULL if DBH. Options: 'd.b.h.^2','cbh','crc'
 AllomUnitCoef <- function(x, tp = NULL) {
   
   y <- rep(1, length(x))
diff --git a/modules/allometry/R/read.allom.data.R b/modules/allometry/R/read.allom.data.R
index 004fe88ebb1..09c560d1c63 100644
--- a/modules/allometry/R/read.allom.data.R
+++ b/modules/allometry/R/read.allom.data.R
@@ -7,13 +7,15 @@
 # http://opensource.ncsa.illinois.edu/license.html
 #-------------------------------------------------------------------------------
 
-#' @title read.allom.data
-#' @name  read.allom.data
+#' read.allom.data
 #' 
-#' @description Extracts PFT- and component-specific data and allometeric equations from the specified files.
+#' Extracts PFT- and component-specific data and allometeric equations from the specified files.
 #' 
+#' This code also estimates the standard error from R-squared, 
+#' which is required to simulate pseudodata from the allometric eqns.
+#'
 #' @param pft.data   PFT dataframe
-#' \itemize{
+#' \describe{
 #'   \item{acronym}{USDA species acronyms, used with FIELD data (vector)}
 #'   \item{spcd}{USFS species codes, use with TALLY data (vector)}
 #' }
@@ -23,8 +25,6 @@
 #' @param nsim       number of Monte Carlo draws in numerical transforms
 #' @return \item{field}{PFT-filtered field Data}
 #'         \item{parm}{Component- and PFT-filtered Allometric Equations}
-#' @details This code also estimates the standard error from R-squared, 
-#' which is required to simulate pseudodata from the allometric eqns.
 read.allom.data <- function(pft.data, component, field, parm, nsim = 10000) {
   
   allom <- list(parm = NULL, field = NULL)
diff --git a/modules/allometry/man/AllomAve.Rd b/modules/allometry/man/AllomAve.Rd
index f3066a2f276..8daa36d9453 100644
--- a/modules/allometry/man/AllomAve.Rd
+++ b/modules/allometry/man/AllomAve.Rd
@@ -20,12 +20,13 @@ AllomAve(
 \arguments{
 \item{pfts}{pft list from PEcAn settings (if con) OR list of pft spcd's
 If the latter, the names within the list are used to identify PFTs
-\itemize{
+\describe{
   \item{'acronym'}{ - USDA species acronyms (see plants.usda.gov), used with FIELD data (vector)}
   \item{'spcd'}{ - USFS species codes, use with PARM data (vector)}
 }}
 
-\item{components}{IDs for allometry components from Jenkins et al 2004 Table 5. Default is stem biomass (6). See data(allom.components)}
+\item{components}{IDs for allometry components from Jenkins et al 2004 Table 5.
+Default is stem biomass (6). See data(allom.components)}
 
 \item{outdir}{output directory files are written to. Default is getwd()}
 
@@ -47,15 +48,16 @@ If the latter, the names within the list are used to identify PFTs
 nested list of parameter summary statistics
 }
 \description{
-allometery wrapper function that handles loading and subsetting the data,
+Allometery wrapper function that handles loading and subsetting the data,
  fitting the Bayesian models, and generating diagnostic figures. Set up to loop over
   multiple PFTs and components. 
   Writes raw MCMC and PDF of diagnositcs to file and returns table of summary stats.
-}
-\details{
+  
 There are two usages of this function. 
-When running 'online' (connected to the PEcAn database), pass the database connection, con, and the pfts subsection of the PEcAn settings.
-When running 'stand alone' pass the pft list mapping species to species codes and the file paths to the allometry table and field data (optional)
+When running 'online' (connected to the PEcAn database), pass the database connection,
+ con, and the pfts subsection of the PEcAn settings.
+When running 'stand alone' pass the pft list mapping species to species codes
+ and the file paths to the allometry table and field data (optional)
 }
 \examples{
 
diff --git a/modules/allometry/man/allom.BayesFit.Rd b/modules/allometry/man/allom.BayesFit.Rd
index 66bc5933740..7baa69ba2af 100644
--- a/modules/allometry/man/allom.BayesFit.Rd
+++ b/modules/allometry/man/allom.BayesFit.Rd
@@ -12,21 +12,21 @@ allom.BayesFit(allom, nrep = 10000, form = "power", dmin = 0.1, dmax = 500)
   'field' - contains a list, each entry for which is
             a data frame with 'x' and 'y'. Can be NULL
   'parm' -  a single data frame with the following components:
-  \itemize{
-         \item{n}     {sample size}
-         \item{a}     {eqn coefficient}
-         \item{b}     {eqn coefficient}
-         \item{c}     {eqn coefficient}
-         \item{d}     {eqn coefficient}
-         \item{e}     {eqn coefficient}
-         \item{se}    {standard error}
-         \item{eqn}   {sample size}
-         \item{Xmin}  {smallest tree sampled (cm)}
-         \item{Xmax}  {largest tree sampled (cm)}
-         \item{Xcor}  {units correction on X}
-         \item{Ycor}  {units correction on Y}
-         \item{Xtype} {type of measurement on the X}
-         \item{spp}   { - USFS species code}
+  \describe{
+         \item{n}{sample size}
+         \item{a}{eqn coefficient}
+         \item{b}{eqn coefficient}
+         \item{c}{eqn coefficient}
+         \item{d}{eqn coefficient}
+         \item{e}{eqn coefficient}
+         \item{se}{standard error}
+         \item{eqn}{sample size}
+         \item{Xmin}{smallest tree sampled (cm)}
+         \item{Xmax}{largest tree sampled (cm)}
+         \item{Xcor}{units correction on X}
+         \item{Ycor}{units correction on Y}
+         \item{Xtype}{type of measurement on the X}
+         \item{spp}{ - USFS species code}
 }}
 
 \item{nrep}{- number of MCMC replicates}
diff --git a/modules/allometry/man/allom.predict.Rd b/modules/allometry/man/allom.predict.Rd
index e48f5d21a51..0c11064a542 100644
--- a/modules/allometry/man/allom.predict.Rd
+++ b/modules/allometry/man/allom.predict.Rd
@@ -17,7 +17,7 @@ allom.predict(
 }
 \arguments{
 \item{object}{Allometry model object. Option includes
-\itemize{
+\describe{
   \item{'list of mcmc'}{ - mcmc outputs in a list by PFT then component}
   \item{'vector of file paths'}{ - path(s) to AllomAve RData files}
   \item{'directory where files are located}{ - }
@@ -25,7 +25,8 @@ allom.predict(
 
 \item{dbh}{Diameter at Breast Height (cm)}
 
-\item{pft}{Plant Functional Type. Needs to match the name used in AllomAve. Can be NULL if only one PFT/species exists, otherwise needs to the same length as dbh}
+\item{pft}{Plant Functional Type. Needs to match the name used in AllomAve.
+Can be NULL if only one PFT/species exists, otherwise needs to the same length as dbh}
 
 \item{component}{Which component to predict. Can be NULL if only one component was analysed in AllomAve.}
 
@@ -34,6 +35,9 @@ allom.predict(
 \item{use}{c('Bg','mu','best')}
 
 \item{interval}{c('none','confidence','prediction') default is prediction}
+
+\item{single.tree}{logical: Is this a DBH time series from one indidual tree?
+If TRUE, will use a fixed error for all draws.}
 }
 \value{
 matrix of Monte Carlo predictions that has n rows and one column per DBH
diff --git a/modules/allometry/man/load.allom.Rd b/modules/allometry/man/load.allom.Rd
index 23389644719..4b9f0415485 100644
--- a/modules/allometry/man/load.allom.Rd
+++ b/modules/allometry/man/load.allom.Rd
@@ -8,7 +8,7 @@ load.allom(object)
 }
 \arguments{
 \item{object}{Allometry model object. Option includes
-\itemize{
+\describe{
   \item{'vector of file paths'}{ - path(s) to AllomAve RData files}
   \item{'directory where files are located}{ - }
 }}
diff --git a/modules/allometry/man/query.allom.data.Rd b/modules/allometry/man/query.allom.data.Rd
index 7185ba0b56e..dfdf8591b1c 100644
--- a/modules/allometry/man/query.allom.data.Rd
+++ b/modules/allometry/man/query.allom.data.Rd
@@ -18,11 +18,11 @@ query.allom.data(pft_name, variable, con, nsim = 10000)
 \description{
 Module to grab allometric information from the raw data table
 Will grab both original field data and tallied equations
-
-Tallied equation format based on Jenkins et al 2004 USFS
-General Technical Report NE-319
 }
 \details{
+Tallied equation format based on Jenkins et al 2004 USFS
+General Technical Report NE-319
+
 database is assumed to conform to the PEcAn Schema
 }
 \author{
diff --git a/modules/allometry/man/read.allom.data.Rd b/modules/allometry/man/read.allom.data.Rd
index e697399a122..cffd3a490ff 100644
--- a/modules/allometry/man/read.allom.data.Rd
+++ b/modules/allometry/man/read.allom.data.Rd
@@ -8,7 +8,7 @@ read.allom.data(pft.data, component, field, parm, nsim = 10000)
 }
 \arguments{
 \item{pft.data}{PFT dataframe
-\itemize{
+\describe{
   \item{acronym}{USDA species acronyms, used with FIELD data (vector)}
   \item{spcd}{USFS species codes, use with TALLY data (vector)}
 }}
diff --git a/modules/allometry/tests/Rcheck_reference.log b/modules/allometry/tests/Rcheck_reference.log
index 8f31a1cde0b..2f3c924d380 100644
--- a/modules/allometry/tests/Rcheck_reference.log
+++ b/modules/allometry/tests/Rcheck_reference.log
@@ -20,7 +20,8 @@ Requires (indirectly) orphaned package: ‘udunits2’
 * checking whether package ‘PEcAn.allometry’ can be installed ... OK
 * checking installed package size ... OK
 * checking package directory ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/modules/assim.batch/tests/Rcheck_reference.log b/modules/assim.batch/tests/Rcheck_reference.log
index ceaf7cc3ef1..41a05fee54e 100644
--- a/modules/assim.batch/tests/Rcheck_reference.log
+++ b/modules/assim.batch/tests/Rcheck_reference.log
@@ -25,7 +25,8 @@ use conditionally.
 * checking package directory ... OK
 * checking for future file timestamps ... OK
 * checking ‘build’ directory ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/modules/assim.sequential/R/Analysis_sda.R b/modules/assim.sequential/R/Analysis_sda.R
index 5c209b4d1d7..e2af6bbfd0f 100644
--- a/modules/assim.sequential/R/Analysis_sda.R
+++ b/modules/assim.sequential/R/Analysis_sda.R
@@ -98,6 +98,7 @@ EnKF<-function(settings, Forecast, Observed, H, extraArg=NULL, ...){
 ##' @param settings  pecan standard settings list.  
 ##' @param Forecast A list containing the forecasts variables including Q (process variance) and X (a dataframe of forecast state variables for different ensemble)
 ##' @param Observed A list containing the observed variables including R (cov of observed state variables) and Y (vector of estimated mean of observed state variables)
+##' @param H not used
 ##' @param extraArg This argument is a list containing aqq, bqq and t. The aqq and bqq are shape parameters estimated over time for the process covariance and t gives the time in terms of index of obs.list. See Details.
 ##' @param nitr Number of iterations to run each MCMC chain.
 ##' @param nburnin 	Number of initial, pre-thinning, MCMC iterations to discard.
diff --git a/modules/assim.sequential/R/GEF_Helper.R b/modules/assim.sequential/R/GEF_Helper.R
index 6f4573941f7..9012cc803c5 100644
--- a/modules/assim.sequential/R/GEF_Helper.R
+++ b/modules/assim.sequential/R/GEF_Helper.R
@@ -5,8 +5,9 @@
 #' @param var.names (character) variable names.
 #' @param mu.f (numeric) forecast mean values.
 #' @param Pf (numeric) forecast covariance matrix.
+#' @param t (numeric) timestep. If t=1, initial values are imputed for zero values in mu.f
 #'
-#' @return
+#' @return list with updated mu.f, pf, X, and indication of which y values are censored
 #' @export
 #'
 #' @examples
diff --git a/modules/assim.sequential/R/Multi_Site_Constructors.R b/modules/assim.sequential/R/Multi_Site_Constructors.R
index 29603586007..88a30bd56fb 100755
--- a/modules/assim.sequential/R/Multi_Site_Constructors.R
+++ b/modules/assim.sequential/R/Multi_Site_Constructors.R
@@ -6,6 +6,9 @@
 ##' @param var.names vector names of state variable names.
 ##' @param X a matrix of state variables. In this matrix rows represent ensembles, while columns show the variables for different sites.
 ##' @param localization.FUN This is the function that performs the localization of the Pf matrix and it returns a localized matrix with the same dimensions.
+##' @param t not used
+##' @param blocked.dis passed to `localization.FUN`
+##' @param ... passed to `localization.FUN`
 ##' @description The argument X needs to have an attribute pointing the state variables to their corresponding site. This attribute needs to be called `Site`.
 ##' At the moment, the cov between state variables at blocks defining the cov between two sites are assumed zero.
 ##' @return It returns the var-cov matrix of state variables at multiple sites.
diff --git a/modules/assim.sequential/R/Remote_helpers.R b/modules/assim.sequential/R/Remote_helpers.R
index 139f289f811..107d4f8a7f7 100644
--- a/modules/assim.sequential/R/Remote_helpers.R
+++ b/modules/assim.sequential/R/Remote_helpers.R
@@ -67,6 +67,7 @@ Obs.data.prepare.MultiSite <- function(obs.path, site.ids) {
 #'
 #' @param settingPath The Path to the setting that will run SDA
 #' @param ObsPath  Path to the obs data which is expected to be an .Rdata.
+#' @param run.bash.args Shell commands to be run on the remote host before launching the SDA. See examples
 #'
 #' @export
 #' @return This function returns a list of two pieces of information. One the remote path that SDA is running and the PID of the active run.
diff --git a/modules/assim.sequential/R/hop_test.R b/modules/assim.sequential/R/hop_test.R
index 5b69ad4ee7d..4c79b437e30 100644
--- a/modules/assim.sequential/R/hop_test.R
+++ b/modules/assim.sequential/R/hop_test.R
@@ -4,6 +4,7 @@
 ##' 
 ##' @param settings    SDA PEcAn settings object
 ##' @param nyear       number of years to run hop test over
+##' @param ens.runid   run id. If not provided, is looked up from [settings$outdir]/runs.txt
 ##'
 ##' @description Hop test. This script tests that the model successfully reads it's own restart and can restart without loss of information.
 ##' 
diff --git a/modules/assim.sequential/R/load_data_paleon_sda.R b/modules/assim.sequential/R/load_data_paleon_sda.R
index fb3e8d70ecb..a77596796ed 100644
--- a/modules/assim.sequential/R/load_data_paleon_sda.R
+++ b/modules/assim.sequential/R/load_data_paleon_sda.R
@@ -294,7 +294,7 @@ load_data_paleon_sda <- function(settings){
     
     ### Error Message for no data product
     if(format_id[[i]] != '1000000040' & format_id[[i]] != '1000000058'){
-      PEcAn.logger::logger.severe('ERROR: This data format has not been added to this function (ツ)_/¯ ')
+      PEcAn.logger::logger.severe('ERROR: This data format has not been added to this function :(')
     }
     
   }
diff --git a/modules/assim.sequential/R/sda.enkf.R b/modules/assim.sequential/R/sda.enkf.R
index b6d7ce56040..3dff5d07c79 100644
--- a/modules/assim.sequential/R/sda.enkf.R
+++ b/modules/assim.sequential/R/sda.enkf.R
@@ -1,5 +1,10 @@
-##' @title sda.enkf
-##' @name  sda.enkf
+##' State Variable Data Assimilation: Ensemble Kalman Filter
+##’
+##’ Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast),
+##'  a new workflow folder is created and the previous forecast for the start_time is copied over.
+##' During restart the initial run before the loop is skipped, with the info being populated from the previous run.
+##' The function then dives right into the first Analysis, then continues on like normal.
+##'
 ##' @author Michael Dietze and Ann Raiho \email{dietze@@bu.edu}
 ##' 
 ##' @param settings    PEcAn settings object
@@ -10,11 +15,6 @@
 ##' @param adjustment  flag for using ensemble adjustment filter or not
 ##' @param restart      Used for iterative updating previous forecasts. This is a list that includes ens.inputs, the list of inputs by ensemble member, params, the parameters, and old_outdir, the output directory from the previous workflow. These three things are needed to ensure that if a new workflow is started that ensemble members keep there run-specific met and params. See Details
 ##'
-##’ @details
-##’ Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast), a new workflow folder is created and the previous forecast for the start_time is copied over. During restart the initial run before the loop is skipped, with the info being populated from the previous run. The function then dives right into the first Analysis, then continues on like normal.
-##' 
-##' @description State Variable Data Assimilation: Ensemble Kalman Filter
-##' 
 ##' 
 ##' @return NONE
 ##' @export
diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R
index b46d954ab95..81b77c6ba33 100644
--- a/modules/assim.sequential/R/sda.enkf_MultiSite.R
+++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R
@@ -1,5 +1,14 @@
-#' @title sda.enkf.multisite
-#' @name  sda.enkf.multisite
+#' State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter
+#'
+#' Check out SDA_control function for more details on the control arguments.
+#'
+#' Restart mode:  Basic idea is that during a restart (primary case envisioned
+#' as an iterative forecast), a new workflow folder is created and the previous
+#' forecast for the start_time is copied over. During restart the initial run
+#' before the loop is skipped, with the info being populated from the previous
+#' run. The function then dives right into the first Analysis, then continues
+#' on like normal.
+#' 
 #' @author Michael Dietze, Ann Raiho and Alexis Helgeson \email{dietze@@bu.edu}
 #' 
 #' @param settings  PEcAn settings object
@@ -22,11 +31,7 @@
 #' `forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations;
 #' `run_parallel` decide if we want to run the SDA under parallel mode for the `future_map` function;
 #' `MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).
-#'
-#’ @details
-#’ Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast), a new workflow folder is created and the previous forecast for the start_time is copied over. During restart the initial run before the loop is skipped, with the info being populated from the previous run. The function then dives right into the first Analysis, then continues on like normal.
-#' 
-#' @description State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter. Check out SDA_control function for more details on the control arguments.
+#' @param ...       Additional arguments, currently ignored
 #' 
 #' @return NONE
 #' @import nimble furrr
diff --git a/modules/assim.sequential/R/sda.enkf_refactored.R b/modules/assim.sequential/R/sda.enkf_refactored.R
index 836ef604afb..f87b5eecc41 100644
--- a/modules/assim.sequential/R/sda.enkf_refactored.R
+++ b/modules/assim.sequential/R/sda.enkf_refactored.R
@@ -1,25 +1,37 @@
-#' @title sda.enkf
-#' @name  sda.enkf
+#' State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter
+#'
+#' Restart mode:  Basic idea is that during a restart (primary case
+#'  envisioned as an iterative forecast), a new workflow folder is created and
+#'  the previous forecast for the start_time is copied over. During restart the
+#'  initial run before the loop is skipped, with the info being populated from
+#'  the previous run. The function then dives right into the first Analysis,
+#'  then continues on like normal.
+#'
 #' @author Michael Dietze and Ann Raiho \email{dietze@@bu.edu}
 #' 
 #' @param settings  PEcAn settings object
-#' @param obs.mean  List of dataframe of observation means, named with observation datetime.
-#' @param obs.cov   List of covariance matrices of state variables , named with observation datetime.
-#' @param Q         Process covariance matrix given if there is no data to estimate it.
-#' @param restart   Used for iterative updating previous forecasts. When the restart is TRUE it read the object in SDA folder written from previous SDA.
-#' @param control   List of flags controlling the behaviour of the SDA. trace for reporting back the SDA outcomes, interactivePlot for plotting the outcomes after each step, 
-#' TimeseriesPlot for post analysis examination, BiasPlot for plotting the correlation between state variables, plot.title is the title of post analysis plots and debug mode allows for pausing the code and examining the variables inside the function.
+#' @param obs.mean  List of dataframe of observation means, named with
+#'  observation datetime.
+#' @param obs.cov   List of covariance matrices of state variables , named with
+#'  observation datetime.
+#' @param Q         Process covariance matrix given if there is no data to
+#'  estimate it.
+#' @param restart   Used for iterative updating previous forecasts. When the
+#'  restart is TRUE it read the object in SDA folder written from previous
+#'  SDA.
+#' @param control   List of flags controlling the behaviour of the SDA. trace
+#'  for reporting back the SDA outcomes, interactivePlot for plotting the
+#'  outcomes after each step, TimeseriesPlot for post analysis examination,
+#'  BiasPlot for plotting the correlation between state variables, plot.title
+#'  is the title of post analysis plots and debug mode allows for pausing the
+#'  code and examining the variables inside the function.
+#' @param ...       Additional arguments, currently ignored
 #'
-#’ @details
-#’ Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast), a new workflow folder is created and the previous forecast for the start_time is copied over. During restart the initial run before the loop is skipped, with the info being populated from the previous run. The function then dives right into the first Analysis, then continues on like normal.
-#' 
-#' @description State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter
 #' 
 #' @return NONE
 #' @import nimble
 #' @export
 #' 
-
 sda.enkf <- function(settings,
                      obs.mean,
                      obs.cov,
diff --git a/modules/assim.sequential/R/sda_plotting.R b/modules/assim.sequential/R/sda_plotting.R
index 5bd30c3c64b..a9a33b614f0 100755
--- a/modules/assim.sequential/R/sda_plotting.R
+++ b/modules/assim.sequential/R/sda_plotting.R
@@ -334,6 +334,7 @@ postana.bias.plotting.sda<-function(settings, t, obs.times, obs.mean, obs.cov, o
 }
 
 ##' @rdname interactive.plotting.sda
+#' @param aqq,bqq shape parameters estimated over time for the process covariance
 ##' @export
 postana.bias.plotting.sda.corr<-function(t, obs.times, X, aqq, bqq){
   
@@ -569,6 +570,8 @@ post.analysis.ggplot.violin <- function(settings, t, obs.times, obs.mean, obs.co
 }
 
 ##' @rdname interactive.plotting.sda
+#' @param facetg logical: Create a subpanel for each variable?
+#' @param readsFF optional forward forecast
 ##' @export
 post.analysis.multisite.ggplot <- function(settings, t, obs.times, obs.mean, obs.cov, FORECAST, ANALYSIS, plot.title=NULL, facetg=FALSE, readsFF=NULL, Add_Map=FALSE){
 
diff --git a/modules/assim.sequential/man/Contruct.Pf.Rd b/modules/assim.sequential/man/Contruct.Pf.Rd
index ea6485de76a..acdaad508e6 100644
--- a/modules/assim.sequential/man/Contruct.Pf.Rd
+++ b/modules/assim.sequential/man/Contruct.Pf.Rd
@@ -22,6 +22,12 @@ Contruct.Pf(
 \item{X}{a matrix of state variables. In this matrix rows represent ensembles, while columns show the variables for different sites.}
 
 \item{localization.FUN}{This is the function that performs the localization of the Pf matrix and it returns a localized matrix with the same dimensions.}
+
+\item{t}{not used}
+
+\item{blocked.dis}{passed to `localization.FUN`}
+
+\item{...}{passed to `localization.FUN`}
 }
 \value{
 It returns the var-cov matrix of state variables at multiple sites.
diff --git a/modules/assim.sequential/man/GEF.Rd b/modules/assim.sequential/man/GEF.Rd
index a0c9f0aad2d..accb0eb31e9 100644
--- a/modules/assim.sequential/man/GEF.Rd
+++ b/modules/assim.sequential/man/GEF.Rd
@@ -25,6 +25,8 @@ GEF.MultiSite(settings, Forecast, Observed, H, extraArg, ...)
 
 \item{Observed}{A list containing the observed variables including R (cov of observed state variables) and Y (vector of estimated mean of observed state variables)}
 
+\item{H}{not used}
+
 \item{extraArg}{This argument is a list containing aqq, bqq and t. The aqq and bqq are shape parameters estimated over time for the process covariance and t gives the time in terms of index of obs.list. See Details.}
 
 \item{nitr}{Number of iterations to run each MCMC chain.}
diff --git a/modules/assim.sequential/man/SDA_remote_launcher.Rd b/modules/assim.sequential/man/SDA_remote_launcher.Rd
index df2b5bb10fb..a90ff7758d0 100644
--- a/modules/assim.sequential/man/SDA_remote_launcher.Rd
+++ b/modules/assim.sequential/man/SDA_remote_launcher.Rd
@@ -10,6 +10,8 @@ SDA_remote_launcher(settingPath, ObsPath, run.bash.args)
 \item{settingPath}{The Path to the setting that will run SDA}
 
 \item{ObsPath}{Path to the obs data which is expected to be an .Rdata.}
+
+\item{run.bash.args}{Shell commands to be run on the remote host before launching the SDA. See examples}
 }
 \value{
 This function returns a list of two pieces of information. One the remote path that SDA is running and the PID of the active run.
diff --git a/modules/assim.sequential/man/hop_test.Rd b/modules/assim.sequential/man/hop_test.Rd
index 7a22ae812a2..b3b804c9632 100644
--- a/modules/assim.sequential/man/hop_test.Rd
+++ b/modules/assim.sequential/man/hop_test.Rd
@@ -9,6 +9,8 @@ hop_test(settings, ens.runid = NULL, nyear)
 \arguments{
 \item{settings}{SDA PEcAn settings object}
 
+\item{ens.runid}{run id. If not provided, is looked up from [settings$outdir]/runs.txt}
+
 \item{nyear}{number of years to run hop test over}
 }
 \value{
diff --git a/modules/assim.sequential/man/interactive.plotting.sda.Rd b/modules/assim.sequential/man/interactive.plotting.sda.Rd
index 295cdd18561..35fabdaab05 100644
--- a/modules/assim.sequential/man/interactive.plotting.sda.Rd
+++ b/modules/assim.sequential/man/interactive.plotting.sda.Rd
@@ -128,8 +128,14 @@ SDA_timeseries_plot(
 
 \item{ANALYSIS}{Analysis object from the sda.output.Rdata.}
 
+\item{aqq, bqq}{shape parameters estimated over time for the process covariance}
+
 \item{plot.title}{character giving the title for post visualization ggplots}
 
+\item{facetg}{logical: Create a subpanel for each variable?}
+
+\item{readsFF}{optional forward forecast}
+
 \item{Add_Map}{Bool variable decide if we want to export the GIS map of Ecoregion.}
 
 \item{outdir}{physical path where the pdf will be stored.}
diff --git a/modules/assim.sequential/man/sda.enkf.Rd b/modules/assim.sequential/man/sda.enkf.Rd
index c3757f0497b..999685cca38 100644
--- a/modules/assim.sequential/man/sda.enkf.Rd
+++ b/modules/assim.sequential/man/sda.enkf.Rd
@@ -1,20 +1,9 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sda.enkf.R, R/sda.enkf_refactored.R
+% Please edit documentation in R/sda.enkf_refactored.R
 \name{sda.enkf}
 \alias{sda.enkf}
-\alias{sda.enkf.original}
-\title{sda.enkf}
+\title{State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter}
 \usage{
-sda.enkf.original(
-  settings,
-  obs.mean,
-  obs.cov,
-  IC = NULL,
-  Q = NULL,
-  adjustment = TRUE,
-  restart = NULL
-)
-
 sda.enkf(
   settings,
   obs.mean,
@@ -29,30 +18,38 @@ sda.enkf(
 \arguments{
 \item{settings}{PEcAn settings object}
 
-\item{obs.mean}{List of dataframe of observation means, named with observation datetime.}
-
-\item{obs.cov}{List of covariance matrices of state variables , named with observation datetime.}
+\item{obs.mean}{List of dataframe of observation means, named with
+observation datetime.}
 
-\item{IC}{initial conditions}
+\item{obs.cov}{List of covariance matrices of state variables , named with
+observation datetime.}
 
-\item{Q}{Process covariance matrix given if there is no data to estimate it.}
+\item{Q}{Process covariance matrix given if there is no data to
+estimate it.}
 
-\item{adjustment}{flag for using ensemble adjustment filter or not}
+\item{restart}{Used for iterative updating previous forecasts. When the
+restart is TRUE it read the object in SDA folder written from previous
+SDA.}
 
-\item{restart}{Used for iterative updating previous forecasts. When the restart is TRUE it read the object in SDA folder written from previous SDA.}
+\item{control}{List of flags controlling the behaviour of the SDA. trace
+for reporting back the SDA outcomes, interactivePlot for plotting the
+outcomes after each step, TimeseriesPlot for post analysis examination,
+BiasPlot for plotting the correlation between state variables, plot.title
+is the title of post analysis plots and debug mode allows for pausing the
+code and examining the variables inside the function.}
 
-\item{control}{List of flags controlling the behaviour of the SDA. trace for reporting back the SDA outcomes, interactivePlot for plotting the outcomes after each step, 
-TimeseriesPlot for post analysis examination, BiasPlot for plotting the correlation between state variables, plot.title is the title of post analysis plots and debug mode allows for pausing the code and examining the variables inside the function.}
+\item{...}{Additional arguments, currently ignored}
 }
 \value{
-NONE
-
 NONE
 }
 \description{
-State Variable Data Assimilation: Ensemble Kalman Filter
-
-State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter
+Restart mode:  Basic idea is that during a restart (primary case
+ envisioned as an iterative forecast), a new workflow folder is created and
+ the previous forecast for the start_time is copied over. During restart the
+ initial run before the loop is skipped, with the info being populated from
+ the previous run. The function then dives right into the first Analysis,
+ then continues on like normal.
 }
 \author{
 Michael Dietze and Ann Raiho \email{dietze@bu.edu}
diff --git a/modules/assim.sequential/man/sda.enkf.multisite.Rd b/modules/assim.sequential/man/sda.enkf.multisite.Rd
index 9c3560b3e89..81b79f1c1a1 100644
--- a/modules/assim.sequential/man/sda.enkf.multisite.Rd
+++ b/modules/assim.sequential/man/sda.enkf.multisite.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/sda.enkf_MultiSite.R
 \name{sda.enkf.multisite}
 \alias{sda.enkf.multisite}
-\title{sda.enkf.multisite}
+\title{State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter}
 \usage{
 sda.enkf.multisite(
   settings,
@@ -46,12 +46,22 @@ sda.enkf.multisite(
 `forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations;
 `run_parallel` decide if we want to run the SDA under parallel mode for the `future_map` function;
 `MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).}
+
+\item{...}{Additional arguments, currently ignored}
 }
 \value{
 NONE
 }
 \description{
-State Variable Data Assimilation: Ensemble Kalman Filter and Generalized ensemble filter. Check out SDA_control function for more details on the control arguments.
+Check out SDA_control function for more details on the control arguments.
+}
+\details{
+Restart mode:  Basic idea is that during a restart (primary case envisioned
+as an iterative forecast), a new workflow folder is created and the previous
+forecast for the start_time is copied over. During restart the initial run
+before the loop is skipped, with the info being populated from the previous
+run. The function then dives right into the first Analysis, then continues
+on like normal.
 }
 \author{
 Michael Dietze, Ann Raiho and Alexis Helgeson \email{dietze@bu.edu}
diff --git a/modules/assim.sequential/man/tobit_model_censored.Rd b/modules/assim.sequential/man/tobit_model_censored.Rd
index abab60dfebd..f1067af054e 100644
--- a/modules/assim.sequential/man/tobit_model_censored.Rd
+++ b/modules/assim.sequential/man/tobit_model_censored.Rd
@@ -16,6 +16,11 @@ tobit_model_censored(settings, X, var.names, mu.f, Pf, t)
 \item{mu.f}{(numeric) forecast mean values.}
 
 \item{Pf}{(numeric) forecast covariance matrix.}
+
+\item{t}{(numeric) timestep. If t=1, initial values are imputed for zero values in mu.f}
+}
+\value{
+list with updated mu.f, pf, X, and indication of which y values are censored
 }
 \description{
 tobit_model_censored
diff --git a/modules/assim.sequential/tests/Rcheck_reference.log b/modules/assim.sequential/tests/Rcheck_reference.log
index 580fe6dc7b8..2efbc68b8c0 100644
--- a/modules/assim.sequential/tests/Rcheck_reference.log
+++ b/modules/assim.sequential/tests/Rcheck_reference.log
@@ -20,17 +20,13 @@
 * checking installed package size ... OK
 * checking package directory ... OK
 * checking for future file timestamps ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
 * checking package subdirectories ... OK
-* checking R files for non-ASCII characters ... WARNING
-Found the following file with non-ASCII characters:
-  load_data_paleon_sda.R
-Portable packages must use only ASCII characters in their R code,
-except perhaps in comments.
-Use \uxxxx escapes for other characters.
+* checking R files for non-ASCII characters ... OK
 * checking R files for syntax errors ... OK
 * checking whether the package can be loaded ... OK
 * checking whether the package can be loaded with stated dependencies ... OK
diff --git a/modules/benchmark/tests/Rcheck_reference.log b/modules/benchmark/tests/Rcheck_reference.log
index 825ed0601a0..26e595f3099 100644
--- a/modules/benchmark/tests/Rcheck_reference.log
+++ b/modules/benchmark/tests/Rcheck_reference.log
@@ -20,7 +20,8 @@
 * checking installed package size ... OK
 * checking package directory ... OK
 * checking for future file timestamps ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
index 893b7b1da18..32ecfce8a76 100644
--- a/modules/data.atmosphere/DESCRIPTION
+++ b/modules/data.atmosphere/DESCRIPTION
@@ -63,6 +63,7 @@ Suggests:
     foreach,
     furrr,
     future,
+    knitr,
     mockery,
     parallel,
     PEcAn.settings,
@@ -79,5 +80,6 @@ License: BSD_3_clause + file LICENSE
 Copyright: Authors
 LazyLoad: yes
 LazyData: FALSE
+VignetteBuilder: knitr
 Encoding: UTF-8
 RoxygenNote: 7.3.2
diff --git a/modules/data.atmosphere/R/closest_xy.R b/modules/data.atmosphere/R/closest_xy.R
index a9a18442beb..96c02512736 100644
--- a/modules/data.atmosphere/R/closest_xy.R
+++ b/modules/data.atmosphere/R/closest_xy.R
@@ -1,8 +1,11 @@
 ##' Given latitude and longitude coordinates, find NARR x and y indices
 ##'
+##' @param slat,slon site location, in decimal degrees
+##' @param infolder path to folder containing infile
+##' @param infile pattern to match for filename inside infile.
+##'   Only the first file matching this pattern AND ending with '.nc'
+##'   will be used
 ##'
-##' @name closest_xy
-##' @title closest_xy
 ##' @export
 ##' @author Betsy Cowdery, Ankur Desai
 closest_xy <- function(slat, slon, infolder, infile) {
diff --git a/modules/data.atmosphere/R/download.Ameriflux.R b/modules/data.atmosphere/R/download.Ameriflux.R
index 38ab416a08c..e56e63158d1 100644
--- a/modules/data.atmosphere/R/download.Ameriflux.R
+++ b/modules/data.atmosphere/R/download.Ameriflux.R
@@ -18,6 +18,7 @@ download.Ameriflux.site <- function(site_id) {
 ##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should the function be very verbose
+##' @param ... further arguments, currently ignored
 ##' 
 ##' @author Josh Mantooth, Rob Kooper, Ankur Desai
 download.Ameriflux <- function(sitename, outfolder, start_date, end_date,
diff --git a/modules/data.atmosphere/R/download.AmerifluxLBL.R b/modules/data.atmosphere/R/download.AmerifluxLBL.R
index 4fe706a1f33..677266ffa8a 100644
--- a/modules/data.atmosphere/R/download.AmerifluxLBL.R
+++ b/modules/data.atmosphere/R/download.AmerifluxLBL.R
@@ -20,6 +20,7 @@
 ##' @param useremail Used email, should include 'address sign' for code to be functional
 ##' @param data_product AmeriFlux data product
 ##' @param data_policy Two possible licenses (based on the site): 'CCBY4.0' or 'LEGACY'
+##' @param ... further arguments, currently ignored
 ##'
 ##' @examples
 ##' \dontrun{
diff --git a/modules/data.atmosphere/R/download.ERA5.R b/modules/data.atmosphere/R/download.ERA5.R
index 533aeac96fc..87c6e7a9d3f 100644
--- a/modules/data.atmosphere/R/download.ERA5.R
+++ b/modules/data.atmosphere/R/download.ERA5.R
@@ -30,6 +30,8 @@
 #' @return Character vector of file names containing raw, downloaded
 #'   data (invisibly)
 #' @author Alexey Shiklomanov
+#' @md 
+    # ^ tells Roxygen to interpret this fn's doc block as Markdown
 #' @export
 #' @examples
 #' \dontrun{
diff --git a/modules/data.atmosphere/R/download.Fluxnet2015.R b/modules/data.atmosphere/R/download.Fluxnet2015.R
index fb98328ee30..80bde7b6714 100644
--- a/modules/data.atmosphere/R/download.Fluxnet2015.R
+++ b/modules/data.atmosphere/R/download.Fluxnet2015.R
@@ -10,6 +10,8 @@
 ##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should the function be very verbose
+##' @param username login name for Ameriflux
+##' @param ... further arguments, currently ignored
 ##' 
 ##' @author Ankur Desai, based on download.Ameriflux.R by Josh Mantooth, Rob Kooper
 download.Fluxnet2015 <- function(sitename, outfolder, start_date, end_date, 
diff --git a/modules/data.atmosphere/R/download.FluxnetLaThuile.R b/modules/data.atmosphere/R/download.FluxnetLaThuile.R
index 70134b3604a..44d065a46cb 100644
--- a/modules/data.atmosphere/R/download.FluxnetLaThuile.R
+++ b/modules/data.atmosphere/R/download.FluxnetLaThuile.R
@@ -18,6 +18,7 @@ download.FluxnetLaThuile.site <- function(site_id) {
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should the function be very verbose
 ##' @param username should be the registered Fluxnet username, else defaults to pecan
+##' @param ... further arguments, currently ignored
 ##' 
 ##' @author Ankur Desai
 download.FluxnetLaThuile <- function(sitename, outfolder, start_date, end_date, 
diff --git a/modules/data.atmosphere/R/download.GFDL.R b/modules/data.atmosphere/R/download.GFDL.R
index 2f0c975e85d..dd1640ae7ae 100644
--- a/modules/data.atmosphere/R/download.GFDL.R
+++ b/modules/data.atmosphere/R/download.GFDL.R
@@ -13,6 +13,8 @@
 #' @param model Which GFDL model to run (options are CM3, ESM2M, ESM2G)
 #' @param scenario Which scenario to run (options are rcp26, rcp45, rcp60, rcp85)
 #' @param ensemble_member Which ensemble_member to initialize the run (options are r1i1p1, r3i1p1, r5i1p1)
+#' @param ... further arguments, currently ignored
+#'
 #' @author James Simkins, Alexey Shiklomanov, Ankur Desai
 download.GFDL <- function(outfolder, start_date, end_date, lat.in, lon.in,
                           overwrite = FALSE, verbose = FALSE,
diff --git a/modules/data.atmosphere/R/download.NARR_site.R b/modules/data.atmosphere/R/download.NARR_site.R
index 9de280c15d7..bf2fdb1b51d 100644
--- a/modules/data.atmosphere/R/download.NARR_site.R
+++ b/modules/data.atmosphere/R/download.NARR_site.R
@@ -7,9 +7,12 @@
 #' @param lon.in Site longitude coordinate
 #' @param overwrite Overwrite existing files?  Default=FALSE
 #' @param verbose Turn on verbose output? Default=FALSE
+#' @param progress Whether or not to show a progress bar.
+#' Requires the `progress` package to be installed.
 #' @param parallel Download in parallel? Default = TRUE
 #' @param ncores Number of cores for parallel download. Default is
 #' `parallel::detectCores()`
+#' @param ... further arguments, currently ignored
 #'
 #' @examples
 #'
@@ -345,7 +348,7 @@ generate_narr_url <- function(dates, flx) {
     dplyr::select("startdate", "url")
 }
 
-#' Assign daygroup tag for a given date
+# Assign daygroup tag for a given date
 daygroup <- function(date, flx) {
   mday <- lubridate::mday(date)
   mmax <- lubridate::days_in_month(date)
diff --git a/modules/data.atmosphere/R/download.NEONmet.R b/modules/data.atmosphere/R/download.NEONmet.R
index f1262701790..be132a1a5ee 100644
--- a/modules/data.atmosphere/R/download.NEONmet.R
+++ b/modules/data.atmosphere/R/download.NEONmet.R
@@ -12,6 +12,8 @@
 ##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year and month part of the date)
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose makes the function output more text
+##' @param ... further arguments, currently ignored
+##'
 ##' @examples 
 ##' \dontrun{
 ##' result <- download.NEONmet('HARV','~/','2017-01-01','2017-01-31',overwrite=TRUE)
diff --git a/modules/data.atmosphere/R/download.PalEON.R b/modules/data.atmosphere/R/download.PalEON.R
index 50a55ffe9c5..3618d2193b7 100644
--- a/modules/data.atmosphere/R/download.PalEON.R
+++ b/modules/data.atmosphere/R/download.PalEON.R
@@ -20,7 +20,9 @@ download.PalEON <- function(sitename, outfolder, start_date, end_date, overwrite
   else if (sitename == "Howland Forest- main tower (US-Ho1) (PalEON PHO)") {
     site <- "PHO"
   }  # 0-759
-  else if (sitename == "Billy’s Lake (PalEON PBL)") {
+  else if (sitename == "Billy\U2019s Lake (PalEON PBL)") {
+    #\U2019 = curly right single-quote, escaped to keep R from complaining about non-ASCII in code files
+    # (yes, the curly quote is present in the DB sitename)
     site <- "PBL"
   }  # 1-672 done
   else if (sitename == "Deming Lake (PalEON PDL)") {
diff --git a/modules/data.atmosphere/R/extract.nc.R b/modules/data.atmosphere/R/extract.nc.R
index 3f4651175c5..c2f1f7c5001 100644
--- a/modules/data.atmosphere/R/extract.nc.R
+++ b/modules/data.atmosphere/R/extract.nc.R
@@ -11,6 +11,8 @@
 ##' @param slon the longitude of the site
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should ouput of function be extra verbose
+##' @param ... further arguments, currently ignored
+##'
 ##' @export
 ##' @author Betsy Cowdery
 extract.nc <- function(in.path, in.prefix, outfolder, start_date, end_date, slat, slon,
diff --git a/modules/data.atmosphere/R/extract_ERA5.R b/modules/data.atmosphere/R/extract_ERA5.R
index 4bc5a9f05a0..fe5e5b7b5ab 100644
--- a/modules/data.atmosphere/R/extract_ERA5.R
+++ b/modules/data.atmosphere/R/extract_ERA5.R
@@ -6,13 +6,16 @@
 #' @param start_date start date
 #' @param end_date end date
 #' @param outfolder Path to directory where nc files need to be saved.
-#' @param in.prefix initial portion of the filename that does not vary by date. Does not include directory; specify that as part of in.path.
+#' @param in.prefix initial portion of the filename that does not vary by date.
+#'  Does not include directory; specify that as part of in.path.
 #' @param newsite site name.
-#' @param vars variables to be extracted. If NULL all the variables will be returned.
+#' @param vars variables to be extracted. If NULL all the variables will be
+#'  returned.
 #' @param overwrite Logical if files needs to be overwritten.
 #' @param verbose Decide if we want to stop printing info.
 #' @param ... other inputs.
-#' @details For the list of variables check out the documentation at \link{https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid}
+#' @details For the list of variables check out the documentation at \url{
+#'  https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid}
 #'
 #' @return a list of xts objects with all the variables for the requested years
 #' @export
diff --git a/modules/data.atmosphere/R/lightME.R b/modules/data.atmosphere/R/lightME.R
index e7e6667bdd4..842263a718e 100644
--- a/modules/data.atmosphere/R/lightME.R
+++ b/modules/data.atmosphere/R/lightME.R
@@ -13,7 +13,7 @@
 ##' @param alpha atmospheric transmittance, default 0.85.
 ##' @export
 ##' @return a \code{\link{list}} structure with components:
-##' \itemize{
+##' \describe{
 ##'  \item{'I.dir'}{Direct radiation (\eqn{\mu} mol \eqn{m^{-2}s^{-1}}}
 ##'  \item{'I.diff'}{Indirect (diffuse) radiation (\eqn{\mu} mol\eqn{m^{-2}s^{-1}}}
 ##'  \item{'cos.th'}{cosine of \eqn{\theta}, solar zenith angle.}
diff --git a/modules/data.atmosphere/R/merge.met.variable.R b/modules/data.atmosphere/R/merge.met.variable.R
index 13e8c404fb2..dbd369f3769 100644
--- a/modules/data.atmosphere/R/merge.met.variable.R
+++ b/modules/data.atmosphere/R/merge.met.variable.R
@@ -1,23 +1,22 @@
 #' Merge a new met variable from an external file (e.g. CO2) into existing met files
 #'
+#' Currently modifies the files IN PLACE rather than creating a new copy of the files an a new DB record. 
+#' Currently unit and name checking only implemented for CO2. 
+#' Currently does not yet support merge data that has lat/lon
+#' New variable only has time dimension and thus MIGHT break downstream code....
+#'
 #' @param in.path     path to original data
 #' @param in.prefix   prefix of original data
-#' @param start_date  
-#' @param end_date 
+#' @param start_date,end_date date (or character in a standard date format). Only year component is used.
 #' @param merge.file  path of file to be merged in
 #' @param overwrite logical: replace output file if it already exists? 
 #' @param verbose logical: should \code{\link[ncdf4:ncdf4-package]{ncdf4}} functions
 #'   print debugging information as they run? 
-#' @param ... 
+#' @param ... other arguments, currently ignored
 #'
 #' @return Currently nothing. TODO: Return a data frame summarizing the merged files.
 #' @export
 #'
-#' @details Currently modifies the files IN PLACE rather than creating a new copy of the files an a new DB record. 
-#' Currently unit and name checking only implemented for CO2. 
-#' Currently does not yet support merge data that has lat/lon
-#' New variable only has time dimension and thus MIGHT break downstream code....
-#'
 #' @examples
 #' \dontrun{
 #' in.path    <- "~/paleon/PalEONregional_CF_site_1-24047/"
diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R
index 8e1408b3887..3286eb5176a 100644
--- a/modules/data.atmosphere/R/met.process.R
+++ b/modules/data.atmosphere/R/met.process.R
@@ -19,6 +19,8 @@
 ##'        *except* raw met downloads. I.e., it corresponds to:
 ##'
 ##'        list(download = FALSE, met2cf = TRUE, standardize = TRUE,  met2model = TRUE)
+##' @param browndog login info for the Browndog conversion service, if used.
+##'  List of `url`, `username`, `password`
 ##' @importFrom rlang .data .env
 ##' @export
 ##' @author Elizabeth Cowdery, Michael Dietze, Ankur Desai, James Simkins, Ryan Kelly
diff --git a/modules/data.atmosphere/R/met2CF.ALMA.R b/modules/data.atmosphere/R/met2CF.ALMA.R
index 5b47429ece7..d674d071b9c 100644
--- a/modules/data.atmosphere/R/met2CF.ALMA.R
+++ b/modules/data.atmosphere/R/met2CF.ALMA.R
@@ -20,6 +20,8 @@ insertPmet <- function(vals, nc2, var2, dim2, units2 = NA, conv = NULL,
 ##' @param start_date the start date of the data to be downloaded (will only use the year part of the date)
 ##' @param end_date the end date of the data to be downloaded (will only use the year part of the date)
 ##' @param overwrite should existing files be overwritten
+##' @param verbose logical: enable verbose mode for netcdf writer functions?
+##' @param ... further arguments, currently ignored
 ##'
 ##' @author Mike Dietze
 met2CF.PalEONregional <- function(in.path, in.prefix, outfolder, start_date, end_date, overwrite = FALSE,
@@ -179,7 +181,10 @@ met2CF.PalEONregional <- function(in.path, in.prefix, outfolder, start_date, end
 ##' @param outfolder location on disk where outputs will be stored
 ##' @param start_date the start date of the data to be downloaded (will only use the year part of the date)
 ##' @param end_date the end date of the data to be downloaded (will only use the year part of the date)
+##' @param lat,lon site location in decimal degrees. Caution: both must have length one.
 ##' @param overwrite should existing files be overwritten
+##' @param verbose logical: enable verbose mode for netcdf writer functions?
+##' @param ... further arguments, currently ignored
 ##'
 ##' @author Mike Dietze
 met2CF.PalEON <- function(in.path, in.prefix, outfolder, start_date, end_date, lat, lon, overwrite = FALSE,
@@ -373,6 +378,7 @@ met2CF.PalEON <- function(in.path, in.prefix, outfolder, start_date, end_date, l
 ##' @param start_date the start date of the data to be downloaded (will only use the year part of the date)
 ##' @param end_date the end date of the data to be downloaded (will only use the year part of the date)
 ##' @param overwrite should existing files be overwritten
+##' @param verbose logical: enable verbose mode for netcdf writer functions?
 ##'
 ##' @author Mike Dietze
 met2CF.ALMA <- function(in.path, in.prefix, outfolder, start_date, end_date, overwrite = FALSE, verbose = FALSE) {
diff --git a/modules/data.atmosphere/R/met2CF.Ameriflux.R b/modules/data.atmosphere/R/met2CF.Ameriflux.R
index 9686b6e7732..88e873d4a3d 100644
--- a/modules/data.atmosphere/R/met2CF.Ameriflux.R
+++ b/modules/data.atmosphere/R/met2CF.Ameriflux.R
@@ -69,6 +69,7 @@ getLatLon <- function(nc1) {
 ##' @param end_date the end date of the data to be downloaded (will only use the year part of the date)
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should ouput of function be extra verbose
+##' @param ... further arguments, currently ignored
 ##'
 ##' @author Josh Mantooth, Mike Dietze, Elizabeth Cowdery, Ankur Desai
 met2CF.Ameriflux <- function(in.path, in.prefix, outfolder, start_date, end_date,
diff --git a/modules/data.atmosphere/R/met2CF.AmerifluxLBL.R b/modules/data.atmosphere/R/met2CF.AmerifluxLBL.R
index abbb81a42c6..69f5c882304 100644
--- a/modules/data.atmosphere/R/met2CF.AmerifluxLBL.R
+++ b/modules/data.atmosphere/R/met2CF.AmerifluxLBL.R
@@ -29,6 +29,7 @@
 
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should ouput of function be extra verbose
+##' @param ... further arguments, currently ignored
 ##'
 ##' @author Ankur Desai
 met2CF.AmerifluxLBL <- function(in.path, in.prefix, outfolder, start_date, end_date, format,
diff --git a/modules/data.atmosphere/R/metgapfill.NOAA_GEFS.R b/modules/data.atmosphere/R/metgapfill.NOAA_GEFS.R
index 1e573a65706..820028978ba 100644
--- a/modules/data.atmosphere/R/metgapfill.NOAA_GEFS.R
+++ b/modules/data.atmosphere/R/metgapfill.NOAA_GEFS.R
@@ -1,19 +1,21 @@
-##'@title Gapfill NOAA_GEFS weather data
-##'@section Purpose:
-##'This function uses simple methods to gapfill NOAA GEFS met data
-##'Temperature and Precipitation are gapfilled with spline; other data sources are gapfilled with
-##'using linear models fitted to other fitted data.
-##'
-##'@param in.prefix the met file name
-##'@param in.path The location of the file
-##'@param outfolder The place to write the output file to
-##'@param start_date The start date of the contents of the file
-##'@param end_date The end date of the contents of the file
-##'@param overwrite Whether or not to overwrite the output file if it exists or not
-##'@param verbose Passed to nc writing functions for additional output
-##'@export
-##'
-##'@author Luke Dramko
+#' Gapfill NOAA_GEFS weather data
+#'
+#' This function uses simple methods to gapfill NOAA GEFS met data.
+#' Temperature and Precipitation are gapfilled with splines;
+#'  other data sources are gapfilled using linear models fitted to
+#'  other fitted data.
+#'
+#' @param in.prefix the met file name
+#' @param in.path The location of the file
+#' @param outfolder The place to write the output file to
+#' @param start_date The start date of the contents of the file
+#' @param end_date The end date of the contents of the file
+#' @param overwrite Whether or not to overwrite the output file if it exists or not
+#' @param verbose Passed to nc writing functions for additional output
+#' @param ... further arguments, currently ignored
+#'
+#' @author Luke Dramko
+#' @export
 metgapfill.NOAA_GEFS <- function(in.prefix, in.path, outfolder, start_date, end_date,
                                  overwrite = FALSE, verbose = FALSE, ...) {
   
diff --git a/modules/data.atmosphere/R/metgapfill.R b/modules/data.atmosphere/R/metgapfill.R
index db53eebe875..9a4bc711df7 100644
--- a/modules/data.atmosphere/R/metgapfill.R
+++ b/modules/data.atmosphere/R/metgapfill.R
@@ -13,6 +13,8 @@
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should the function be very verbose
 ##' @param lst is timezone offset from UTC, if timezone is available in time:units attribute in file, it will use that, default is to assume UTC
+##' @param ... further arguments, currently ignored
+##'
 ##' @author Ankur Desai
 metgapfill <- function(in.path, in.prefix, outfolder, start_date, end_date, lst = 0,
                        overwrite = FALSE, verbose = FALSE, ...) {
diff --git a/modules/data.atmosphere/R/nc_merge.R b/modules/data.atmosphere/R/nc_merge.R
index e532e66d69d..963ea9e998f 100644
--- a/modules/data.atmosphere/R/nc_merge.R
+++ b/modules/data.atmosphere/R/nc_merge.R
@@ -23,6 +23,8 @@
 ##' @param overwrite logical: replace output file if it already exists?
 ##' @param verbose logical: should \code{\link[ncdf4:ncdf4-package]{ncdf4}}
 ##'   functions print debugging information as they run?
+##' @param ... further arguments, currently ignored
+##'
 ##' @export
 # -----------------------------------
 #----------------------------------------------------------------------
diff --git a/modules/data.atmosphere/R/permute.nc.R b/modules/data.atmosphere/R/permute.nc.R
index 2ed515968f1..24a3a96c117 100644
--- a/modules/data.atmosphere/R/permute.nc.R
+++ b/modules/data.atmosphere/R/permute.nc.R
@@ -10,6 +10,7 @@
 ##' @param end_date the end date of the data to be permuted (will only use the year part of the date)
 ##' @param overwrite should existing files be overwritten
 ##' @param verbose should ouput of function be extra verbose
+##' @param ... further arguments, currently ignored
 ##'
 ##' @author Elizabeth Cowdery, Rob Kooper
 permute.nc <- function(in.path, in.prefix, outfolder, start_date, end_date, 
diff --git a/modules/data.atmosphere/R/split_wind.R b/modules/data.atmosphere/R/split_wind.R
index 427d8a57aa4..a43de879ec8 100644
--- a/modules/data.atmosphere/R/split_wind.R
+++ b/modules/data.atmosphere/R/split_wind.R
@@ -1,9 +1,10 @@
 #' Split wind_speed into eastward_wind and northward_wind
 #'
+#' Currently modifies the files IN PLACE rather than creating a new copy of the files an a new DB record. 
+#'
 #' @param in.path     path to original data
 #' @param in.prefix   prefix of original data
-#' @param start_date  
-#' @param end_date 
+#' @param start_date,end_date date (or character in a standard date format). Only year component is used.
 #' @param overwrite logical: replace output file if it already exists? 
 #' @param verbose logical: should \code{\link[ncdf4:ncdf4-package]{ncdf4}} functions print debugging information as they run? 
 #' @param ... other arguments, currently ignored
@@ -11,7 +12,6 @@
 #' @return nothing. TODO: Return data frame summarizing results
 #' @export
 #'
-#' @details Currently modifies the files IN PLACE rather than creating a new copy of the files an a new DB record. 
 #'
 #' @examples
 #' \dontrun{
diff --git a/modules/data.atmosphere/R/tdm_generate_subdaily_models.R b/modules/data.atmosphere/R/tdm_generate_subdaily_models.R
index 35031f492e3..87e45b405d2 100644
--- a/modules/data.atmosphere/R/tdm_generate_subdaily_models.R
+++ b/modules/data.atmosphere/R/tdm_generate_subdaily_models.R
@@ -1,48 +1,38 @@
-##' Generate Subdaily Models
-##' Create statistical models to predict subdaily meteorology
-# -----------------------------------
-# Description
-# -----------------------------------
-##'
-##' @title gen.subdaily.models
-##' @family tdm - Temporally Downscale Meteorology
-##' @author Christy Rollinson, James Simkins
-##' @description This is the 2nd function in the tdm workflow that takes the dat.train_file that is created from the
-##'              nc2dat.train function and generates "lag.days" and "next.days". These variables pass along information
-##'              of the previous time step and provides a preview of the next time step. After these variables are created,
-##'              the models are generated by calling the tdm_temporal_downscale_functions.R scripts and these models
-##'              and betas are saved separately. Please note that these models and betas require a significant
-##'              amount of space. The storage required varies by the size of the training dataset, but prepare for
-##'              >100 GB. These will be called later in tdm_predict_subdaily_met to perform the linear regression
-##'              analysis.
-# -----------------------------------
-# Parameters
-# -----------------------------------
-##' @param outfolder - directory where models will be stored *** storage required varies by size of training dataset, but prepare for >10 GB
-##' @param path.train - path to CF/PEcAn style training data where each year is in a separate file.
-##' @param yrs.train - which years of the training data should be used for to generate the model for 
-##'                    the subdaily cycle.  If NULL, will default to all years
-##' @param direction.filter - Whether the model will be filtered backward or forward in time. options = c("backward", "forward")
-##'                           (PalEON will go backward, anybody interested in the future will go forward)                  
-##' @param in.prefix 
-##' @param n.beta - number of betas to save from linear regression model
-##' @param resids - logical stating whether to pass on residual data or not (this increases both memory & storage requirements)
-##' @param parallel - logical stating whether to run temporal_downscale_functions.R in parallel 
-##' @param n.cores - deals with parallelization
-##' @param day.window - integer specifying number of days around the day being modeled you want to use data from for that 
-##'                     specific hours coefficients. Must be integer because we want statistics from the same time of day
-##'                     for each day surrounding the model day
-##' @param seed - seed for randomization to allow for reproducible results                    
-##' @param overwrite logical: replace output file if it already exists?
-##' @param verbose logical, currently ignored
-##' @param print.progress - print progress bar? (gets passed through)
-##' @export
-# -----------------------------------
-#----------------------------------------------------------------------
-# Begin Function
-#----------------------------------------------------------------------
-
-
+#' Generate Subdaily Models
+#'
+#' Create statistical models to predict subdaily meteorology
+#' This is the 2nd function in the tdm workflow that takes the dat.train_file that is created from the
+#'              nc2dat.train function and generates "lag.days" and "next.days". These variables pass along information
+#'              of the previous time step and provides a preview of the next time step. After these variables are created,
+#'              the models are generated by calling the tdm_temporal_downscale_functions.R scripts and these models
+#'              and betas are saved separately. Please note that these models and betas require a significant
+#'              amount of space. The storage required varies by the size of the training dataset, but prepare for
+#'              >100 GB. These will be called later in tdm_predict_subdaily_met to perform the linear regression
+#'              analysis.
+#'
+#' @family tdm - Temporally Downscale Meteorology
+#' @author Christy Rollinson, James Simkins
+#'
+#' @param outfolder - directory where models will be stored *** storage required varies by size of training dataset, but prepare for >10 GB
+#' @param path.train - path to CF/PEcAn style training data where each year is in a separate file.
+#' @param yrs.train - which years of the training data should be used for to generate the model for 
+#'                    the subdaily cycle.  If NULL, will default to all years
+#' @param direction.filter - Whether the model will be filtered backward or forward in time. options = c("backward", "forward")
+#'                           (PalEON will go backward, anybody interested in the future will go forward)                  
+#' @param in.prefix not used
+#' @param n.beta - number of betas to save from linear regression model
+#' @param resids - logical stating whether to pass on residual data or not (this increases both memory & storage requirements)
+#' @param parallel - logical stating whether to run temporal_downscale_functions.R in parallel 
+#' @param n.cores - deals with parallelization
+#' @param day.window - integer specifying number of days around the day being modeled you want to use data from for that 
+#'                     specific hours coefficients. Must be integer because we want statistics from the same time of day
+#'                     for each day surrounding the model day
+#' @param seed - seed for randomization to allow for reproducible results                    
+#' @param overwrite logical: replace output file if it already exists?
+#' @param verbose logical, currently ignored
+#' @param print.progress - print progress bar? (gets passed through)
+#' @export
+#'
 gen.subdaily.models <- function(outfolder, path.train, yrs.train, direction.filter="forward", in.prefix,  
     n.beta, day.window, seed=Sys.time(), resids = FALSE, parallel = FALSE, n.cores = NULL, overwrite = TRUE, 
     verbose = FALSE, print.progress=FALSE) {
diff --git a/modules/data.atmosphere/R/tdm_lm_ensemble_sims.R b/modules/data.atmosphere/R/tdm_lm_ensemble_sims.R
index d594666611c..5b7a04e07e5 100644
--- a/modules/data.atmosphere/R/tdm_lm_ensemble_sims.R
+++ b/modules/data.atmosphere/R/tdm_lm_ensemble_sims.R
@@ -19,6 +19,7 @@
 ##' @param path.model - path to where the training model & betas is stored
 ##' @param direction.filter - Whether the model will be filtered backward or forward in time. options = c("backward", "forward")
 ##'                           (PalEON will go backward, anybody interested in the future will go forward)
+##' @param lags.list - optional list form of lags.init, with one entry for each unique `ens.day` in dat.mod
 ##' @param lags.init - a data frame of initialization parameters to match the data in dat.mod
 ##' @param dat.train - the training data used to fit the model; needed for night/day in
 ##'                    surface_downwelling_shortwave_flux_in_air
diff --git a/modules/data.atmosphere/R/tdm_model_train.R b/modules/data.atmosphere/R/tdm_model_train.R
index 0cff73e411d..03a8060cd82 100644
--- a/modules/data.atmosphere/R/tdm_model_train.R
+++ b/modules/data.atmosphere/R/tdm_model_train.R
@@ -14,10 +14,13 @@
 # Parameters
 # -----------------------------------
 ##' @param dat.subset data.frame containing lags, next, and downscale period data
+##' @param v variable name, as character
 ##' @param n.beta number of betas to pull from
 ##' @param resids TRUE or FALSE, whether to use residuals or not
 ##' @param threshold NULL except for surface_downwelling_shortwave_radiation, helps with our
 ##'                  distinction between day and night (no shortwave without sunlight)
+##' @param ... further arguments, currently ignored
+##'
 ##' @export
 # -----------------------------------
 #----------------------------------------------------------------------
diff --git a/modules/data.atmosphere/R/tdm_predict_subdaily_met.R b/modules/data.atmosphere/R/tdm_predict_subdaily_met.R
index 825bd203672..3e4526d8b50 100644
--- a/modules/data.atmosphere/R/tdm_predict_subdaily_met.R
+++ b/modules/data.atmosphere/R/tdm_predict_subdaily_met.R
@@ -37,6 +37,8 @@
 ##' @param verbose logical: should \code{\link[ncdf4:ncdf4-package]{ncdf4}} functions print debugging information as they run?
 ##' @param print.progress - print the progress bar?
 ##' @param seed - manually set seed for results to be reproducible
+##' @param ... further arguments, currently ignored
+##'
 ##' @export
 ##' @examples
 ##' \dontrun{
diff --git a/modules/data.atmosphere/R/tdm_temporal_downscale_functions.R b/modules/data.atmosphere/R/tdm_temporal_downscale_functions.R
index 911b1ba4014..76331439a08 100644
--- a/modules/data.atmosphere/R/tdm_temporal_downscale_functions.R
+++ b/modules/data.atmosphere/R/tdm_temporal_downscale_functions.R
@@ -28,6 +28,8 @@
 ##' @param seed - allows this to be reproducible
 ##' @param outfolder = where the output should be stored
 ##' @param print.progress - print progress of model generation?
+##' @param ... further arguments, currently ignored
+##'
 ##' @export
 # -----------------------------------
 #----------------------------------------------------------------------
diff --git a/modules/data.atmosphere/man/closest_xy.Rd b/modules/data.atmosphere/man/closest_xy.Rd
index 1b8e3c17db3..b767af5b5d0 100644
--- a/modules/data.atmosphere/man/closest_xy.Rd
+++ b/modules/data.atmosphere/man/closest_xy.Rd
@@ -2,10 +2,19 @@
 % Please edit documentation in R/closest_xy.R
 \name{closest_xy}
 \alias{closest_xy}
-\title{closest_xy}
+\title{Given latitude and longitude coordinates, find NARR x and y indices}
 \usage{
 closest_xy(slat, slon, infolder, infile)
 }
+\arguments{
+\item{slat, slon}{site location, in decimal degrees}
+
+\item{infolder}{path to folder containing infile}
+
+\item{infile}{pattern to match for filename inside infile.
+Only the first file matching this pattern AND ending with '.nc'
+will be used}
+}
 \description{
 Given latitude and longitude coordinates, find NARR x and y indices
 }
diff --git a/modules/data.atmosphere/man/daygroup.Rd b/modules/data.atmosphere/man/daygroup.Rd
deleted file mode 100644
index 10dab9e98b6..00000000000
--- a/modules/data.atmosphere/man/daygroup.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/download.NARR_site.R
-\name{daygroup}
-\alias{daygroup}
-\title{Assign daygroup tag for a given date}
-\usage{
-daygroup(date, flx)
-}
-\description{
-Assign daygroup tag for a given date
-}
diff --git a/modules/data.atmosphere/man/download.Ameriflux.Rd b/modules/data.atmosphere/man/download.Ameriflux.Rd
index 4bb914bcaf1..15091897fc2 100644
--- a/modules/data.atmosphere/man/download.Ameriflux.Rd
+++ b/modules/data.atmosphere/man/download.Ameriflux.Rd
@@ -27,6 +27,8 @@ The 'SITE_ID' field in \href{http://ameriflux.lbl.gov/sites/site-list-and-pages/
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should the function be very verbose}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Download Ameriflux L2 netCDF files
diff --git a/modules/data.atmosphere/man/download.AmerifluxLBL.Rd b/modules/data.atmosphere/man/download.AmerifluxLBL.Rd
index 8b30000484f..9b8fa2290ad 100644
--- a/modules/data.atmosphere/man/download.AmerifluxLBL.Rd
+++ b/modules/data.atmosphere/man/download.AmerifluxLBL.Rd
@@ -42,6 +42,8 @@ The 'SITE_ID' field in \href{http://ameriflux.lbl.gov/sites/site-list-and-pages/
 \item{data_product}{AmeriFlux data product}
 
 \item{data_policy}{Two possible licenses (based on the site): 'CCBY4.0' or 'LEGACY'}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 download.AmerifluxLBL. Function uses amf_download_base function from amerifluxr package
diff --git a/modules/data.atmosphere/man/download.ERA5.old.Rd b/modules/data.atmosphere/man/download.ERA5.old.Rd
index d065a061cec..64199c95ae1 100644
--- a/modules/data.atmosphere/man/download.ERA5.old.Rd
+++ b/modules/data.atmosphere/man/download.ERA5.old.Rd
@@ -20,20 +20,20 @@ download.ERA5.old(
 \item{outfolder}{Directory where results should be written}
 
 \item{start_date, end_date}{Range of years to retrieve. Format is
-`YYYY-MM-DD`.}
+\code{YYYY-MM-DD}.}
 
 \item{lat.in, lon.in}{Site coordinates, decimal degrees (numeric)}
 
-\item{product_types}{Character vector of product types, or `"all"`.
-Must be one or more of: `"reanalysis"`, `"ensemble members"`,
-`"ensemble mean"`, `"ensemble spread"`}
+\item{product_types}{Character vector of product types, or \code{"all"}.
+Must be one or more of: \code{"reanalysis"}, \code{"ensemble members"},
+\code{"ensemble mean"}, \code{"ensemble spread"}}
 
-\item{overwrite}{Logical. If `FALSE` (default), skip any files with
+\item{overwrite}{Logical. If \code{FALSE} (default), skip any files with
 the same target name (i.e. same variable) that already exist in
-`outfolder`. If `TRUE`, silently overwrite existing files.}
+\code{outfolder}. If \code{TRUE}, silently overwrite existing files.}
 
-\item{reticulate_python}{Path to Python binary for `reticulate`
-(passed to [reticulate::use_python()]). If `NULL` (default), use
+\item{reticulate_python}{Path to Python binary for \code{reticulate}
+(passed to \code{\link[reticulate:use_python]{reticulate::use_python()}}). If \code{NULL} (default), use
 the system default.}
 
 \item{...}{Currently unused. Allows soaking up additional arguments
@@ -41,21 +41,21 @@ to other methods.}
 }
 \value{
 Character vector of file names containing raw, downloaded
-  data (invisibly)
+data (invisibly)
 }
 \description{
-Link to [full data documentation](https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation).
+Link to \href{https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation}{full data documentation}.
 }
 \details{
-Under the hood, this function uses the Python `cdsapi` module,
-which can be installed via `pip` (`pip install --user cdsapi`). The
-module is accessed via the `reticulate` package.
+Under the hood, this function uses the Python \code{cdsapi} module,
+which can be installed via \code{pip} (\verb{pip install --user cdsapi}). The
+module is accessed via the \code{reticulate} package.
 
 Using the CDS API requires you to create a free account at
 https://cds.climate.copernicus.eu. Once you have done that, you
 will need to configure the CDS API on your local machine by
-creating a `${HOME}/.cdsapi` file, as described
-[here](https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key).
+creating a \verb{$\{HOME\}/.cdsapi} file, as described
+\href{https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key}{here}.
 }
 \examples{
 \dontrun{
diff --git a/modules/data.atmosphere/man/download.Fluxnet2015.Rd b/modules/data.atmosphere/man/download.Fluxnet2015.Rd
index 80f1e105dfc..72c7f309ee4 100644
--- a/modules/data.atmosphere/man/download.Fluxnet2015.Rd
+++ b/modules/data.atmosphere/man/download.Fluxnet2015.Rd
@@ -28,6 +28,10 @@ The 'SITE_ID' field in \href{https://fluxnet.org/sites/site-list-and-pages/}{lis
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should the function be very verbose}
+
+\item{username}{login name for Ameriflux}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Download Fluxnet 2015 CSV files
diff --git a/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd b/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd
index d403f5ea0a2..218365ee2e7 100644
--- a/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd
+++ b/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd
@@ -30,6 +30,8 @@ The 'SITE_ID' field in \href{http://www.fluxdata.org/DataInfo/Dataset\%20Doc\%20
 \item{verbose}{should the function be very verbose}
 
 \item{username}{should be the registered Fluxnet username, else defaults to pecan}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Download Flxunet LaThuile CSV files
diff --git a/modules/data.atmosphere/man/download.GFDL.Rd b/modules/data.atmosphere/man/download.GFDL.Rd
index c1e5368e525..36f3260e0ef 100644
--- a/modules/data.atmosphere/man/download.GFDL.Rd
+++ b/modules/data.atmosphere/man/download.GFDL.Rd
@@ -40,6 +40,8 @@ the same name already exists?}
 \item{scenario}{Which scenario to run (options are rcp26, rcp45, rcp60, rcp85)}
 
 \item{ensemble_member}{Which ensemble_member to initialize the run (options are r1i1p1, r3i1p1, r5i1p1)}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Download GFDL CMIP5 outputs for a single grid point using OPeNDAP and convert to CF
diff --git a/modules/data.atmosphere/man/download.NARR_site.Rd b/modules/data.atmosphere/man/download.NARR_site.Rd
index d310c6a2060..0cbdf407772 100644
--- a/modules/data.atmosphere/man/download.NARR_site.Rd
+++ b/modules/data.atmosphere/man/download.NARR_site.Rd
@@ -33,10 +33,15 @@ download.NARR_site(
 
 \item{verbose}{Turn on verbose output? Default=FALSE}
 
+\item{progress}{Whether or not to show a progress bar.
+Requires the `progress` package to be installed.}
+
 \item{parallel}{Download in parallel? Default = TRUE}
 
 \item{ncores}{Number of cores for parallel download. Default is
 `parallel::detectCores()`}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Download NARR time series for a single site
diff --git a/modules/data.atmosphere/man/download.NEONmet.Rd b/modules/data.atmosphere/man/download.NEONmet.Rd
index bf9708a9b7c..c8ffa061ae6 100644
--- a/modules/data.atmosphere/man/download.NEONmet.Rd
+++ b/modules/data.atmosphere/man/download.NEONmet.Rd
@@ -27,6 +27,8 @@ The 4-letter SITE code  in \href{https://www.neonscience.org/science-design/fiel
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{makes the function output more text}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 download.NEONmet
diff --git a/modules/data.atmosphere/man/extract.nc.ERA5.Rd b/modules/data.atmosphere/man/extract.nc.ERA5.Rd
index 7dd24354abd..d377f131bd4 100644
--- a/modules/data.atmosphere/man/extract.nc.ERA5.Rd
+++ b/modules/data.atmosphere/man/extract.nc.ERA5.Rd
@@ -32,11 +32,13 @@ extract.nc.ERA5(
 
 \item{outfolder}{Path to directory where nc files need to be saved.}
 
-\item{in.prefix}{initial portion of the filename that does not vary by date. Does not include directory; specify that as part of in.path.}
+\item{in.prefix}{initial portion of the filename that does not vary by date.
+Does not include directory; specify that as part of in.path.}
 
 \item{newsite}{site name.}
 
-\item{vars}{variables to be extracted. If NULL all the variables will be returned.}
+\item{vars}{variables to be extracted. If NULL all the variables will be
+returned.}
 
 \item{overwrite}{Logical if files needs to be overwritten.}
 
@@ -51,7 +53,8 @@ a list of xts objects with all the variables for the requested years
 ERA5_extract
 }
 \details{
-For the list of variables check out the documentation at \link{https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid}
+For the list of variables check out the documentation at \url{
+ https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid}
 }
 \examples{
 \dontrun{
diff --git a/modules/data.atmosphere/man/extract.nc.Rd b/modules/data.atmosphere/man/extract.nc.Rd
index 8fe057801f0..ef9a4868698 100644
--- a/modules/data.atmosphere/man/extract.nc.Rd
+++ b/modules/data.atmosphere/man/extract.nc.Rd
@@ -35,6 +35,8 @@ extract.nc(
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should ouput of function be extra verbose}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Given latitude and longitude coordinates, extract site data from NARR file
diff --git a/modules/data.atmosphere/man/gen.subdaily.models.Rd b/modules/data.atmosphere/man/gen.subdaily.models.Rd
index 872183e9514..3c77bc657c9 100644
--- a/modules/data.atmosphere/man/gen.subdaily.models.Rd
+++ b/modules/data.atmosphere/man/gen.subdaily.models.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/tdm_generate_subdaily_models.R
 \name{gen.subdaily.models}
 \alias{gen.subdaily.models}
-\title{gen.subdaily.models}
+\title{Generate Subdaily Models}
 \usage{
 gen.subdaily.models(
   outfolder,
@@ -32,7 +32,7 @@ the subdaily cycle.  If NULL, will default to all years}
 \item{direction.filter}{- Whether the model will be filtered backward or forward in time. options = c("backward", "forward")
 (PalEON will go backward, anybody interested in the future will go forward)}
 
-\item{in.prefix}{}
+\item{in.prefix}{not used}
 
 \item{n.beta}{- number of betas to save from linear regression model}
 
@@ -55,6 +55,7 @@ for each day surrounding the model day}
 \item{print.progress}{- print progress bar? (gets passed through)}
 }
 \description{
+Create statistical models to predict subdaily meteorology
 This is the 2nd function in the tdm workflow that takes the dat.train_file that is created from the
              nc2dat.train function and generates "lag.days" and "next.days". These variables pass along information
              of the previous time step and provides a preview of the next time step. After these variables are created,
@@ -64,10 +65,6 @@ This is the 2nd function in the tdm workflow that takes the dat.train_file that
              >100 GB. These will be called later in tdm_predict_subdaily_met to perform the linear regression
              analysis.
 }
-\details{
-Generate Subdaily Models
-Create statistical models to predict subdaily meteorology
-}
 \seealso{
 Other tdm - Temporally Downscale Meteorology: 
 \code{\link{lm_ensemble_sims}()},
diff --git a/modules/data.atmosphere/man/lightME.Rd b/modules/data.atmosphere/man/lightME.Rd
index 8bddbd3eeab..ad9042e79fc 100644
--- a/modules/data.atmosphere/man/lightME.Rd
+++ b/modules/data.atmosphere/man/lightME.Rd
@@ -21,7 +21,7 @@ lightME(lat = 40, DOY = 190, t.d = 12, t.sn = 12, atm.P = 1e+05, alpha = 0.85)
 }
 \value{
 a \code{\link{list}} structure with components:
-\itemize{
+\describe{
  \item{'I.dir'}{Direct radiation (\eqn{\mu} mol \eqn{m^{-2}s^{-1}}}
  \item{'I.diff'}{Indirect (diffuse) radiation (\eqn{\mu} mol\eqn{m^{-2}s^{-1}}}
  \item{'cos.th'}{cosine of \eqn{\theta}, solar zenith angle.}
diff --git a/modules/data.atmosphere/man/lm_ensemble_sims.Rd b/modules/data.atmosphere/man/lm_ensemble_sims.Rd
index 72fc86a8fe6..18e9841e98f 100644
--- a/modules/data.atmosphere/man/lm_ensemble_sims.Rd
+++ b/modules/data.atmosphere/man/lm_ensemble_sims.Rd
@@ -30,6 +30,8 @@ lm_ensemble_sims(
 \item{direction.filter}{- Whether the model will be filtered backward or forward in time. options = c("backward", "forward")
 (PalEON will go backward, anybody interested in the future will go forward)}
 
+\item{lags.list}{- optional list form of lags.init, with one entry for each unique `ens.day` in dat.mod}
+
 \item{lags.init}{- a data frame of initialization parameters to match the data in dat.mod}
 
 \item{dat.train}{- the training data used to fit the model; needed for night/day in
diff --git a/modules/data.atmosphere/man/merge_met_variable.Rd b/modules/data.atmosphere/man/merge_met_variable.Rd
index c310eb7689b..66821bf6bb7 100644
--- a/modules/data.atmosphere/man/merge_met_variable.Rd
+++ b/modules/data.atmosphere/man/merge_met_variable.Rd
@@ -20,9 +20,7 @@ merge_met_variable(
 
 \item{in.prefix}{prefix of original data}
 
-\item{start_date}{}
-
-\item{end_date}{}
+\item{start_date, end_date}{date (or character in a standard date format). Only year component is used.}
 
 \item{merge.file}{path of file to be merged in}
 
@@ -31,15 +29,12 @@ merge_met_variable(
 \item{verbose}{logical: should \code{\link[ncdf4:ncdf4-package]{ncdf4}} functions
 print debugging information as they run?}
 
-\item{...}{}
+\item{...}{other arguments, currently ignored}
 }
 \value{
 Currently nothing. TODO: Return a data frame summarizing the merged files.
 }
 \description{
-Merge a new met variable from an external file (e.g. CO2) into existing met files
-}
-\details{
 Currently modifies the files IN PLACE rather than creating a new copy of the files an a new DB record. 
 Currently unit and name checking only implemented for CO2. 
 Currently does not yet support merge data that has lat/lon
diff --git a/modules/data.atmosphere/man/met.process.Rd b/modules/data.atmosphere/man/met.process.Rd
index 7aee712c518..d2a1641da01 100644
--- a/modules/data.atmosphere/man/met.process.Rd
+++ b/modules/data.atmosphere/man/met.process.Rd
@@ -35,6 +35,9 @@ met.process(
 
 \item{dir}{directory to write outputs to}
 
+\item{browndog}{login info for the Browndog conversion service, if used.
+List of `url`, `username`, `password`}
+
 \item{spin}{spin-up settings passed to model-specific met2model. List containing nyear (number of years of spin-up), nsample (first n years to cycle), and resample (TRUE/FALSE)}
 
 \item{overwrite}{Whether to force met.process to proceed.
diff --git a/modules/data.atmosphere/man/met2CF.ALMA.Rd b/modules/data.atmosphere/man/met2CF.ALMA.Rd
index fde57db76b3..88481bd6e1d 100644
--- a/modules/data.atmosphere/man/met2CF.ALMA.Rd
+++ b/modules/data.atmosphere/man/met2CF.ALMA.Rd
@@ -26,6 +26,8 @@ met2CF.ALMA(
 \item{end_date}{the end date of the data to be downloaded (will only use the year part of the date)}
 
 \item{overwrite}{should existing files be overwritten}
+
+\item{verbose}{logical: enable verbose mode for netcdf writer functions?}
 }
 \description{
 Get meteorology variables from ALMA netCDF files and convert to netCDF CF format
diff --git a/modules/data.atmosphere/man/met2CF.Ameriflux.Rd b/modules/data.atmosphere/man/met2CF.Ameriflux.Rd
index 0e054519cef..96bbf1c72ca 100644
--- a/modules/data.atmosphere/man/met2CF.Ameriflux.Rd
+++ b/modules/data.atmosphere/man/met2CF.Ameriflux.Rd
@@ -29,6 +29,8 @@ met2CF.Ameriflux(
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should ouput of function be extra verbose}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Get meteorology variables from Ameriflux L2 netCDF files and convert to netCDF CF format
diff --git a/modules/data.atmosphere/man/met2CF.AmerifluxLBL.Rd b/modules/data.atmosphere/man/met2CF.AmerifluxLBL.Rd
index b37b4c2478f..9ea8eeeca6b 100644
--- a/modules/data.atmosphere/man/met2CF.AmerifluxLBL.Rd
+++ b/modules/data.atmosphere/man/met2CF.AmerifluxLBL.Rd
@@ -49,6 +49,8 @@ Units for datetime field are the lubridate function that will be used to parse t
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should ouput of function be extra verbose}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Get meteorology variables from Ameriflux LBL and convert to netCDF CF format
diff --git a/modules/data.atmosphere/man/met2CF.PalEON.Rd b/modules/data.atmosphere/man/met2CF.PalEON.Rd
index 6879078c28a..565c14cfff0 100644
--- a/modules/data.atmosphere/man/met2CF.PalEON.Rd
+++ b/modules/data.atmosphere/man/met2CF.PalEON.Rd
@@ -28,7 +28,13 @@ met2CF.PalEON(
 
 \item{end_date}{the end date of the data to be downloaded (will only use the year part of the date)}
 
+\item{lat, lon}{site location in decimal degrees. Caution: both must have length one.}
+
 \item{overwrite}{should existing files be overwritten}
+
+\item{verbose}{logical: enable verbose mode for netcdf writer functions?}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Get meteorology variables from PalEON netCDF files and convert to netCDF CF format
diff --git a/modules/data.atmosphere/man/met2CF.PalEONregional.Rd b/modules/data.atmosphere/man/met2CF.PalEONregional.Rd
index 946032482ba..dcdad2904c3 100644
--- a/modules/data.atmosphere/man/met2CF.PalEONregional.Rd
+++ b/modules/data.atmosphere/man/met2CF.PalEONregional.Rd
@@ -27,6 +27,10 @@ met2CF.PalEONregional(
 \item{end_date}{the end date of the data to be downloaded (will only use the year part of the date)}
 
 \item{overwrite}{should existing files be overwritten}
+
+\item{verbose}{logical: enable verbose mode for netcdf writer functions?}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Get meteorology variables from PalEON netCDF files and convert to netCDF CF format
diff --git a/modules/data.atmosphere/man/metgapfill.NOAA_GEFS.Rd b/modules/data.atmosphere/man/metgapfill.NOAA_GEFS.Rd
index 196a6394990..c03113329a8 100644
--- a/modules/data.atmosphere/man/metgapfill.NOAA_GEFS.Rd
+++ b/modules/data.atmosphere/man/metgapfill.NOAA_GEFS.Rd
@@ -29,17 +29,15 @@ metgapfill.NOAA_GEFS(
 \item{overwrite}{Whether or not to overwrite the output file if it exists or not}
 
 \item{verbose}{Passed to nc writing functions for additional output}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
-Gapfill NOAA_GEFS weather data
-}
-\section{Purpose}{
-
-This function uses simple methods to gapfill NOAA GEFS met data
-Temperature and Precipitation are gapfilled with spline; other data sources are gapfilled with
-using linear models fitted to other fitted data.
+This function uses simple methods to gapfill NOAA GEFS met data.
+Temperature and Precipitation are gapfilled with splines;
+ other data sources are gapfilled using linear models fitted to
+ other fitted data.
 }
-
 \author{
 Luke Dramko
 }
diff --git a/modules/data.atmosphere/man/metgapfill.Rd b/modules/data.atmosphere/man/metgapfill.Rd
index 5c96abb1525..92507769909 100644
--- a/modules/data.atmosphere/man/metgapfill.Rd
+++ b/modules/data.atmosphere/man/metgapfill.Rd
@@ -36,6 +36,8 @@ metgapfill(
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should the function be very verbose}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Take an Ameriflux NetCDF file
diff --git a/modules/data.atmosphere/man/model.train.Rd b/modules/data.atmosphere/man/model.train.Rd
index eea0a3fec51..47e760df95b 100644
--- a/modules/data.atmosphere/man/model.train.Rd
+++ b/modules/data.atmosphere/man/model.train.Rd
@@ -9,12 +9,16 @@ model.train(dat.subset, v, n.beta, resids = resids, threshold = NULL, ...)
 \arguments{
 \item{dat.subset}{data.frame containing lags, next, and downscale period data}
 
+\item{v}{variable name, as character}
+
 \item{n.beta}{number of betas to pull from}
 
 \item{resids}{TRUE or FALSE, whether to use residuals or not}
 
 \item{threshold}{NULL except for surface_downwelling_shortwave_radiation, helps with our
 distinction between day and night (no shortwave without sunlight)}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Function to create linear regression models for specific met
diff --git a/modules/data.atmosphere/man/nc.merge.Rd b/modules/data.atmosphere/man/nc.merge.Rd
index 75329f45c33..dbed3d19330 100644
--- a/modules/data.atmosphere/man/nc.merge.Rd
+++ b/modules/data.atmosphere/man/nc.merge.Rd
@@ -33,6 +33,8 @@ nc.merge(
 
 \item{verbose}{logical: should \code{\link[ncdf4:ncdf4-package]{ncdf4}}
 functions print debugging information as they run?}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 This is the 1st function for the tdm (Temporally Downscale Meteorology) workflow. The nc2dat.train function
diff --git a/modules/data.atmosphere/man/permute.nc.Rd b/modules/data.atmosphere/man/permute.nc.Rd
index eeafd16fb10..6b276dbdf22 100644
--- a/modules/data.atmosphere/man/permute.nc.Rd
+++ b/modules/data.atmosphere/man/permute.nc.Rd
@@ -29,6 +29,8 @@ permute.nc(
 \item{overwrite}{should existing files be overwritten}
 
 \item{verbose}{should ouput of function be extra verbose}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 Permute netCDF files
diff --git a/modules/data.atmosphere/man/predict_subdaily_met.Rd b/modules/data.atmosphere/man/predict_subdaily_met.Rd
index 09a98b8ce0c..18453131757 100644
--- a/modules/data.atmosphere/man/predict_subdaily_met.Rd
+++ b/modules/data.atmosphere/man/predict_subdaily_met.Rd
@@ -60,6 +60,8 @@ ensemble rather than overwriting with a default naming scheme}
 \item{seed}{- manually set seed for results to be reproducible}
 
 \item{print.progress}{- print the progress bar?}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 This is the main function of the tdm family workflow. This function predicts subdaily meteorology
diff --git a/modules/data.atmosphere/man/split_wind.Rd b/modules/data.atmosphere/man/split_wind.Rd
index 21482185528..02747a03110 100644
--- a/modules/data.atmosphere/man/split_wind.Rd
+++ b/modules/data.atmosphere/man/split_wind.Rd
@@ -19,9 +19,7 @@ split_wind(
 
 \item{in.prefix}{prefix of original data}
 
-\item{start_date}{}
-
-\item{end_date}{}
+\item{start_date, end_date}{date (or character in a standard date format). Only year component is used.}
 
 \item{overwrite}{logical: replace output file if it already exists?}
 
@@ -33,9 +31,6 @@ split_wind(
 nothing. TODO: Return data frame summarizing results
 }
 \description{
-Split wind_speed into eastward_wind and northward_wind
-}
-\details{
 Currently modifies the files IN PLACE rather than creating a new copy of the files an a new DB record.
 }
 \examples{
diff --git a/modules/data.atmosphere/man/temporal.downscale.functions.Rd b/modules/data.atmosphere/man/temporal.downscale.functions.Rd
index 668fe14e60a..654fc66d6d4 100644
--- a/modules/data.atmosphere/man/temporal.downscale.functions.Rd
+++ b/modules/data.atmosphere/man/temporal.downscale.functions.Rd
@@ -37,6 +37,8 @@ still being worked on, set to FALSE}
 \item{outfolder}{= where the output should be stored}
 
 \item{print.progress}{- print progress of model generation?}
+
+\item{...}{further arguments, currently ignored}
 }
 \description{
 This function contains the functions that do the heavy lifting in gen.subdaily.models()
diff --git a/modules/data.atmosphere/tests/Rcheck_reference.log b/modules/data.atmosphere/tests/Rcheck_reference.log
index 855436fb1b1..e68a8cf8073 100644
--- a/modules/data.atmosphere/tests/Rcheck_reference.log
+++ b/modules/data.atmosphere/tests/Rcheck_reference.log
@@ -24,17 +24,13 @@ use conditionally.
 * checking installed package size ... OK
 * checking package directory ... OK
 * checking for future file timestamps ... OK
-* checking DESCRIPTION meta-information ... OK
+* checking DESCRIPTION meta-information ... NOTE
+License stub is invalid DCF.
 * checking top-level files ... OK
 * checking for left-over files ... OK
 * checking index information ... OK
 * checking package subdirectories ... OK
-* checking R files for non-ASCII characters ... WARNING
-Found the following file with non-ASCII characters:
-  download.PalEON.R
-Portable packages must use only ASCII characters in their R code,
-except perhaps in comments.
-Use \uxxxx escapes for other characters.
+* checking R files for non-ASCII characters ... OK
 * checking R files for syntax errors ... OK
 * checking whether the package can be loaded ... OK
 * checking whether the package can be loaded with stated dependencies ... OK
@@ -63,11 +59,7 @@ Undefined global functions or variables:
 * checking Rd files ... OK
 * checking Rd metadata ... OK
 * checking Rd line widths ... OK
-* checking Rd cross-references ... WARNING
-Missing link or links in documentation object 'extract.nc.ERA5.Rd':
-  ‘https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid’
-
-See section 'Cross-references' in the 'Writing R Extensions' manual.
+* checking Rd cross-references ... OK
 * checking for missing documentation entries ... WARNING
 Undocumented data sets:
   ‘FLUXNET.sitemap’ ‘cruncep_landmask’ ‘cruncep’ ‘ebifarm’ ‘narr’
@@ -76,94 +68,8 @@ All user-level objects in a package should have documentation entries.
 See chapter ‘Writing R documentation files’ in the ‘Writing R
 Extensions’ manual.
 * checking for code/documentation mismatches ... OK
-* checking Rd \usage sections ... WARNING
-Undocumented arguments in documentation object 'closest_xy'
-  ‘slat’ ‘slon’ ‘infolder’ ‘infile’
-
-Undocumented arguments in documentation object 'daygroup'
-  ‘date’ ‘flx’
-
-Undocumented arguments in documentation object 'download.Ameriflux'
-  ‘...’
-
-Undocumented arguments in documentation object 'download.AmerifluxLBL'
-  ‘...’
-
-Undocumented arguments in documentation object 'download.Fluxnet2015'
-  ‘username’ ‘...’
-
-Undocumented arguments in documentation object 'download.FluxnetLaThuile'
-  ‘...’
-
-Undocumented arguments in documentation object 'download.GFDL'
-  ‘...’
-
-Undocumented arguments in documentation object 'download.NARR_site'
-  ‘progress’ ‘...’
-
-Undocumented arguments in documentation object 'download.NEONmet'
-  ‘...’
-
-Undocumented arguments in documentation object 'extract.nc'
-  ‘...’
-
-Undocumented arguments in documentation object 'lm_ensemble_sims'
-  ‘lags.list’
-
-Undocumented arguments in documentation object 'met.process'
-  ‘browndog’
-
-Undocumented arguments in documentation object 'met2CF.ALMA'
-  ‘verbose’
-
-Undocumented arguments in documentation object 'met2CF.Ameriflux'
-  ‘...’
-
-Undocumented arguments in documentation object 'met2CF.AmerifluxLBL'
-  ‘...’
-
-Undocumented arguments in documentation object 'met2CF.PalEON'
-  ‘lat’ ‘lon’ ‘verbose’ ‘...’
-
-Undocumented arguments in documentation object 'met2CF.PalEONregional'
-  ‘verbose’ ‘...’
-
-Undocumented arguments in documentation object 'metgapfill.NOAA_GEFS'
-  ‘...’
-
-Undocumented arguments in documentation object 'metgapfill'
-  ‘...’
-
-Undocumented arguments in documentation object 'model.train'
-  ‘v’ ‘...’
-
-Undocumented arguments in documentation object 'nc.merge'
-  ‘...’
-
-Undocumented arguments in documentation object 'permute.nc'
-  ‘...’
-
-Undocumented arguments in documentation object 'predict_subdaily_met'
-  ‘...’
-
-Undocumented arguments in documentation object 'temporal.downscale.functions'
-  ‘...’
-
-Functions with \usage entries need to have the appropriate \alias
-entries, and all their arguments documented.
-The \usage entries must correspond to syntactically valid R code.
-See chapter ‘Writing R documentation files’ in the ‘Writing R
-Extensions’ manual.
-* checking Rd contents ... WARNING
-Argument items with no description in Rd object 'gen.subdaily.models':
-  ‘in.prefix’
-
-Argument items with no description in Rd object 'merge_met_variable':
-  ‘start_date’ ‘end_date’ ‘...’
-
-Argument items with no description in Rd object 'split_wind':
-  ‘start_date’ ‘end_date’
-
+* checking Rd \usage sections ... OK
+* checking Rd contents ... OK
 * checking for unstated dependencies in examples ... OK
 * checking contents of ‘data’ directory ... OK
 * checking data for non-ASCII characters ... OK
@@ -174,11 +80,7 @@ Argument items with no description in Rd object 'split_wind':
                              old_size new_size compress
   cruncep_landmask.RData         39Kb      9Kb       xz
   narr_cruncep_ebifarm.RData    790Kb    597Kb       xz
-* checking files in ‘vignettes’ ... WARNING
-Files in the 'vignettes' directory but no files in 'inst/doc':
-  ‘ameriflux_demo.Rmd’, ‘cfmet_downscaling.Rmd’,
-    ‘compare_narr_cruncep_met.Rmd’, ‘tdm_downscaling.Rmd’
-Package has no Sweave vignette sources and no VignetteBuilder field.
+* checking files in ‘vignettes’ ... OK
 * checking examples ... OK
 * checking for unstated dependencies in ‘tests’ ... OK
 * checking tests ... OK
@@ -187,4 +89,4 @@ Package has no Sweave vignette sources and no VignetteBuilder field.
 * checking for non-standard things in the check directory ... OK
 * checking for detritus in the temp directory ... OK
 * DONE
-Status: 7 WARNINGs, 2 NOTEs
+Status: 2 WARNINGs, 3 NOTEs
diff --git a/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd b/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd
index 3d23e6e4a41..df568a83c15 100644
--- a/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd
+++ b/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd
@@ -2,7 +2,10 @@
 title: "PEcAn: Importing Met data from Bondville, IL Ameriflux station"
 author: "David LeBauer"
 date: "4/28/2015"
-output: html_document
+output: html_vignette
+vignette: >
+  %\VignetteIndexEntry{PEcAn: Importing Met data from Bondville, IL Ameriflux station}
+  %\VignetteEngine{knitr::rmarkdown}
 ---
 
 
@@ -19,7 +22,7 @@ The PEcAn.data.atmosphere source code is in [`modules/data.atmosphere`](https://
 ```{r}
 library(knitr)
 library(ggplot2)
-library(ggthemes)
+# library(ggthemes)
 library(PEcAn.data.atmosphere)
 
 ```
@@ -35,7 +38,7 @@ knitr::opts_chunk$set(message = FALSE, warnings = FALSE,  cache = FALSE,
 
 ## Download Ameriflux data for Bondville
 
-```{r download}
+```{r download,eval=FALSE}
 
 download.Ameriflux(sitename = "US-Bo1", outfolder = "/tmp/", 
                    start_date = "1996-01-01", end_date = "2008-04-10")
@@ -43,7 +46,7 @@ download.Ameriflux(sitename = "US-Bo1", outfolder = "/tmp/",
 
 ## Convert to PEcAn-CF format
 
-```{r met2cf}
+```{r met2cf, eval=FALSE}
 met2CF.Ameriflux(in.path = "/tmp/", in.prefix = "US-Bo1", outfolder = "/tmp/out/", 
                  start_date = "1996-01-01", end_date = "2008-04-10")
 
@@ -63,14 +66,14 @@ system("ncrcat -O -h /tmp/out/US-Bo1.199[6789].nc /tmp/out/US-Bo1.200[12348678].
 Using the `load.cfmet` convenience function. Ameriflux is provided at 30 min intervals. If needed at a finer resolution, see `?cfmet.downscale.time` (which works with subdaily and daily data). There is no `cfmet.upscale.time` function, but would be straightforward to implement if needed.
 
 
-```{r load-data}
+```{r load-data, eval=FALSE}
 
 bondville.nc <- nc_open("/tmp/out/US-Bo11996-2008.nc")
 bondville.cfmet <- load.cfmet(bondville.nc, lat = 40.0061988830566, lon = -88.290397644043, start.date = "1996-08-25", end.date = "2008-04-10")[!is.na(air_pressure)]
 ```
 
 
-```{r}
+```{r, eval=FALSE}
 
 theme_set(theme_tufte())
 p1 <- ggplot() + geom_line(data = bondville.cfmet, aes(x = date, y = surface_downwelling_shortwave_flux_in_air)) + ylab(paste(bondville.nc$var$surface_downwelling_shortwave_flux_in_air$longname, bondville.nc$var$surface_downwelling_shortwave_flux_in_air$units))
@@ -110,13 +113,13 @@ plots <- list(p1, p2, p3, p4, p5, p6, p7, p8, p9)
 
 ## Plot entire time series
 
-```{r long-time, echo=FALSE}
+```{r long-time, echo=FALSE, eval=FALSE}
 lapply(plots, print)
 ```
 
 ## Plot 8-26-1996 to 10-14-1996
 
-```{r two-months, echo=FALSE}
+```{r two-months, echo=FALSE, eval=FALSE}
 lapply(plots, function(x) x + xlim(ymd_hms(c("1996-08-26 18:29:00 UTC", "1996-10-14 18:29:00 UTC"))))
 ```
 
diff --git a/modules/data.atmosphere/vignettes/cfmet_downscaling.Rmd b/modules/data.atmosphere/vignettes/cfmet_downscaling.Rmd
index 1c958ee8ae2..ce0851a2cd6 100644
--- a/modules/data.atmosphere/vignettes/cfmet_downscaling.Rmd
+++ b/modules/data.atmosphere/vignettes/cfmet_downscaling.Rmd
@@ -1,3 +1,11 @@
+---
+title: "Met Downscaling"
+output: html_vignette
+vignette: >
+  %\VignetteIndexEntry{Met Downscaling}
+  %\VignetteEngine{knitr::rmarkdown}
+---
+
 Met Downscaling
 ===============
 
@@ -9,16 +17,16 @@ examples:
 * CRU-NCEP 6 hourly
 
 ### Extract 
-
-```{r}
+TODO: urbana_subdaily_test now lives in the test folder, not extdata
+```{r, eval=FALSE}
 library(PEcAn.data.atmosphere)
-subdaily.nc <- nc_open(system.file("extdata/urbana_subdaily_test.nc", package = "PEcAn.data.atmosphere"))
+subdaily.nc <- ncdf4::nc_open(system.file("extdata/urbana_subdaily_test.nc", package = "PEcAn.data.atmosphere"))
 subdaily.cf <- load.cfmet(met.nc = subdaily.nc, lat = 39.75, lon = -87.25, start.date = "1979-01-01", end.date = "1979-06-30")
 ```
 
 ### Downscale
 
-```{r}
+```{r, eval = FALSE}
 
 hourly.cf <- cfmet.downscale.time(cfmet = subdaily.cf)
 
diff --git a/modules/data.atmosphere/vignettes/compare_narr_cruncep_met.Rmd b/modules/data.atmosphere/vignettes/compare_narr_cruncep_met.Rmd
index 23487a535ee..efaeffb6f12 100644
--- a/modules/data.atmosphere/vignettes/compare_narr_cruncep_met.Rmd
+++ b/modules/data.atmosphere/vignettes/compare_narr_cruncep_met.Rmd
@@ -1,7 +1,17 @@
+---
+title: "Comparing met data from various sources"
+output: html_vignette
+vignette: >
+  %\VignetteIndexEntry{Comparing met data from various sources}
+  %\VignetteEngine{knitr::rmarkdown}
+---
 
 Comparing met data from various sources 
 ========================================================
 
+(All code chunks are set to eval=FALSE because vignette building was throwing errors.
+TODO: Debug and re-enable all chunks)
+
 ## Sources:
 
 * `ebifarm` local met station *Data*
@@ -30,9 +40,9 @@ Comparing 'data' (ebifarm) with gridded products
 TODO: clean up figure titles, labels, write explanations
 
 
-```{r loading-libraries}
+```{r loading-libraries, eval=FALSE}
 library(PEcAn.data.atmosphere)
-library(data.table)
+# library(data.table)
 library(ggplot2)
 theme_set(theme_bw())
 data(narr_cruncep_ebifarm)
@@ -47,7 +57,7 @@ These data are on biocluster.igb.illinois.edu, most 10-100s GB.
 Scripts used to download and convert these data to PEcAn CF format, optimized for time series extraction, are on [GitHub ebimodeling/model-drivers](https://github.com/ebimodeling/model-drivers).
 
 
-```sh
+```{sh, eval=FALSE}
 mkdir ~/testmet/
 ncks -O -d lon,-76.75,-76.25 -d lat,2.75,3.25 /home/groups/ebimodeling/met/narr/threehourly_32km/1979_2013.nc ~/testmet/narr32km_champaign.nc
 
@@ -101,7 +111,7 @@ narr3h$source <- "narr3h"
 ebifarm$source <- "ebifarm"
 ```
 
-```{r reorder-met}
+```{r reorder-met, eval=FALSE}
 met <- rbind(cruncep[,list(source, date, temp = DailyTemp.C, RH, wind = WindSpeed, precip, solar = solarR)],
              narr[,list(source, date, temp = Temp, RH, wind = WS, precip, solar = SolarR)],
              narr3h[,list(source, date, temp = DailyTemp.C, RH, wind = WindSpeed, precip, solar = solarR)],
@@ -114,7 +124,7 @@ met$source <- factor(met$source,
 ### Solar Radiation (PAR) vs Temp
 
 
-```{r  solar-v-temp}
+```{r  solar-v-temp, eval=FALSE}
 
 ggplot() + geom_point(data = met, aes(solar, temp, color = month(date)), alpha = 0.1) + 
   facet_wrap(~source, nrow=1) + 
@@ -127,7 +137,7 @@ ggplot() + geom_point(data = met, aes(solar, temp, color = month(date)), alpha =
 
 ### RH vs Temp
 
-```{r  RH-v-Temp}
+```{r  RH-v-Temp, eval=FALSE}
 
 ggplot() + geom_point(data = met, aes(RH, temp, color = month(date)), alpha = 0.1) + 
   facet_wrap(~source, nrow=1) + 
@@ -137,7 +147,7 @@ ggplot() + geom_point(data = met, aes(RH, temp, color = month(date)), alpha = 0.
 
 ### Solar Radiation and Precipitation: NARR daily vs 3 hourly
 
-```{r  par-v-precip}
+```{r  par-v-precip, eval=FALSE}
 
 ggplot() + geom_point(data = met[solar > 1 & precip > 0.1], aes(solar, precip, color = month(date)), alpha = 0.1) + 
   facet_wrap(~source, nrow=1) + 
@@ -148,7 +158,7 @@ ggplot() + geom_point(data = met[solar > 1 & precip > 0.1], aes(solar, precip, c
 
 ### Precipitation v Temperature 
 
-```{r  precip-v-temp}
+```{r  precip-v-temp, eval=FALSE}
 
 ggplot() + geom_point(data = met, aes(precip, temp, color = month(date)), alpha = 0.1) + 
   facet_wrap(~source, nrow=1) + 
@@ -160,7 +170,7 @@ ggplot() + geom_point(data = met, aes(precip, temp, color = month(date)), alpha
 
 ### Compare Solar Radiation
 
-```{r solar}
+```{r solar, eval=FALSE}
 s <- met[,list(date, day = yday(date), solar, source )]
 s <- s[,list(date = min(date), solar = max(solar)), by = 'day,source']
 
@@ -182,7 +192,7 @@ ggplot() + geom_point(data = met[month(date) >5 & month(date)<9 & solar > 100],
 
 ### Max Solar Radiation for June 1-Aug31 2010
 
-```{r  max-solar-plot}
+```{r  max-solar-plot, eval=FALSE}
 maxsolarplot <- ggplot() +
     geom_line(data = s, aes(date, solar, color = source)) +
     xlim(ymd("2010-06-01"), ymd("2010-08-31")) + ggtitle("Max Daily PAR")
@@ -192,7 +202,7 @@ print(maxsolarplot)
 ### Max Solar Radiation (PAR) Model v OBS
 
 
-```{r create-plots, fig.height = 3, fig.width = 12}
+```{r create-plots, fig.height = 3, fig.width = 12, eval=FALSE}
 maxsolar <- allsolar[,list(obs=max(obs),cruncep=max(cruncep), narr = max(narr), narr3h=max(narr3h), date = min(date)), by = day]
 
 narrsolar <- ggplot() + geom_point(data = maxsolar, aes(obs, narr, color = month(date)), alpha = 0.3)+ scale_color_gradientn(colours = c("Red", "Orange", "Yellow", "Green", "Blue"))+ geom_line(aes(0:2000, 0:2000)) + xlim(c(0,2100)) + ylim(c(0,2100))
@@ -218,7 +228,7 @@ gridExtra::grid.arrange(
 
 ### PAR residuals (model - obs)
 
-```{r solarresid-plot}
+```{r solarresid-plot, eval=FALSE}
 
 solarresiduals <- ggplot(data=allsolar[narr+obs>100]) +
     geom_point(aes(date, narr - obs), alpha = 0.1, color = "blue") +
@@ -237,7 +247,7 @@ print(solarresiduals)
 
 ### Correlations of daily max solar radiation
 
-```{r maxsolar-plot}
+```{r maxsolar-plot, eval = FALSE}
 library(GGally)
 ggpairs(maxsolar[,list(obs, narr3h, narr, cruncep)])
 ```
@@ -247,13 +257,13 @@ ggpairs(maxsolar[,list(obs, narr3h, narr, cruncep)])
 
 ### Compare daily and 3hourly downscaled NARR
 
-```{r}
+```{r, eval = FALSE}
 weachnarr_narr3h
 ```
 
 ### Multiple variables
 
-```{r all-vars-plots, fig.height = 15, fig.width = 10}
+```{r all-vars-plots, fig.height = 15, fig.width = 10, eval = FALSE}
 ### Generate some plots to compare August 
 
 rh <- ggplot() +
@@ -283,16 +293,16 @@ print(gridExtra::grid.arrange(rh, precip, temp, wind, solar, ncol = 1))
 
 
 * Temperature:
-```{r results='markup'}
+```{r results='markup', eval=FALSE}
 kable(met[,list(min = min(temp), mean = mean(temp), max = max(temp)), by = source])
 ```
 * RH
 
-```{r results='markup'} 
+```{r results='markup', eval=FALSE} 
 kable(met[,list(min = min(RH*100), mean = mean(RH*100), max = max(RH*100)), by = source])
 ```
 * Total Precip
-```{r results='markup'}
+```{r results='markup', eval = FALSE}
 kable(met[,list(total=sum(precip)), by = source])
 ```
 
@@ -301,7 +311,7 @@ kable(met[,list(total=sum(precip)), by = source])
 
 * need to print each one ...
 
-```{r more-plots, fig.height = 15, fig.width = 10}
+```{r more-plots, fig.height = 15, fig.width = 10, eval = FALSE}
 
 obs <- merge(met[!source == "ebifarm"], met[source == "ebifarm"], by = "date")
 obs$yday <- yday(obs$date)
diff --git a/modules/data.atmosphere/vignettes/tdm_downscaling.Rmd b/modules/data.atmosphere/vignettes/tdm_downscaling.Rmd
index 4b3624cd138..df283806fad 100644
--- a/modules/data.atmosphere/vignettes/tdm_downscaling.Rmd
+++ b/modules/data.atmosphere/vignettes/tdm_downscaling.Rmd
@@ -1,5 +1,10 @@
-Temporally Downscale Meteorology
-===============
+---
+title: "Temporally Downscale Meteorology"
+output: html_vignette
+vignette: >
+  %\VignetteIndexEntry{Temporally Downscale Meteorology}
+  %\VignetteEngine{knitr::rmarkdown}
+---
 
 
 ### Subdaily Training Data
@@ -11,7 +16,7 @@ Examples:
 
 ### Extract Training Data and Merge All Years Into 1 File
 
-```{r}
+```{r, eval=FALSE}
 library(PEcAn.data.atmosphere)
 library(PEcAn.DB)
 
@@ -41,7 +46,7 @@ nc.merge(outfolder = file.path(outfolder, "training_data"), in.path = file.path(
 ### Generate Linear Regression Models From Training Data
 
 Note: This requires ~ 120 GB of space if using the entire training dataset
-```{r}
+```{r, eval=FALSE}
 in.prefix      <- "US-NR1"
 dat.train.file <- "~/Example/training_data/FLX_US-NR1_FLUXNET2015_SUBSET_HH_1998-2014_1-3_dat.train.nc"
 n.beta         <- 10 # Number of betas for the linear regression model to create, we'll choose 10 for time's sake
@@ -58,7 +63,7 @@ Examples:
 
 ### Extract Data We Want To Downscale
 
-```{r}
+```{r, eval=FALSE}
 start_date      <- "2020-01-01"
 end_date        <- "2020-12-31"
 site_id         <- 772
@@ -71,7 +76,7 @@ download.MACA(outfolder, start_date, end_date, site_id, model, scenario, ensembl
 
 ### Predict Subdaily Data Using Statistics From Training Data
 
-```{r}
+```{r, eval=FALSE}
 in.path        <- "~/Example/MACA_site_0-772"
 in.prefix      <- "MACA.BNU-ESM.rcp85.r1i1p1" # this is the data we are going to downscale
 lm.models.base <- "~/Example/lm_model_output" # where we stored the lm models

From 2d248d999971584f37b654802f4aa2691ab9c475 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 04:25:24 +0530
Subject: [PATCH 087/155] Reverting pecan_package_dependencies.csv to original

so based on my past observation I tried changing NAMESPACE to its original format in hopes to debug it such , bit actions are still 11 fail . riveting this back , should in theory atleast , pass 11 actions , since previously old observation , old NAMESPACE gave 11 pass 2 fail . if this fails
- I wont be clear what the reason of error is
- indicates some sort of change between initial run and this one , which needs to be reverted back .
- if no change is observed then also differential change can help detect the solution .
---
 docker/depends/pecan_package_dependencies.csv | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 0d677a5a02c..f8894581945 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -122,7 +122,6 @@
 "jsonlite","*","models/stics","Imports",FALSE
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
-"keras3","*","modules/assim.sequential","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE
 "knitr",">= 1.42","base/qaqc","Suggests",FALSE
 "knitr",">= 1.42","modules/allometry","Suggests",FALSE

From bf66a239ad4c03516859a8f5f1eaf047df5d3f1f Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Sun, 28 Jul 2024 20:42:43 -0700
Subject: [PATCH 088/155] Update modules/allometry/R/AllomAve.R

---
 modules/allometry/R/AllomAve.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/allometry/R/AllomAve.R b/modules/allometry/R/AllomAve.R
index f6aba498caf..af297be4d7c 100644
--- a/modules/allometry/R/AllomAve.R
+++ b/modules/allometry/R/AllomAve.R
@@ -9,7 +9,7 @@
 
 #' AllomAve
 #'
-#' Allometery wrapper function that handles loading and subsetting the data,
+#' Allometry wrapper function that handles loading and subsetting the data,
 #'  fitting the Bayesian models, and generating diagnostic figures. Set up to loop over
 #'   multiple PFTs and components. 
 #'   Writes raw MCMC and PDF of diagnositcs to file and returns table of summary stats.

From 15b7e56736c2fb17c5958f6ae1009907e4c33e32 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Sun, 28 Jul 2024 20:46:20 -0700
Subject: [PATCH 089/155] Update modules/allometry/man/AllomAve.Rd

---
 modules/allometry/man/AllomAve.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/allometry/man/AllomAve.Rd b/modules/allometry/man/AllomAve.Rd
index 8daa36d9453..7d9fc911bab 100644
--- a/modules/allometry/man/AllomAve.Rd
+++ b/modules/allometry/man/AllomAve.Rd
@@ -48,7 +48,7 @@ Default is stem biomass (6). See data(allom.components)}
 nested list of parameter summary statistics
 }
 \description{
-Allometery wrapper function that handles loading and subsetting the data,
+Allometry wrapper function that handles loading and subsetting the data,
  fitting the Bayesian models, and generating diagnostic figures. Set up to loop over
   multiple PFTs and components. 
   Writes raw MCMC and PDF of diagnositcs to file and returns table of summary stats.

From 8128ae18d363e51d1071f776a8d76ec552334b9f Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Sun, 28 Jul 2024 21:00:21 -0700
Subject: [PATCH 090/155] deps

---
 docker/depends/pecan_package_dependencies.csv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 54bde494f59..6bb9f38974b 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -123,6 +123,7 @@
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
 "knitr","*","base/visualization","Suggests",FALSE
+"knitr","*","modules/data.atmosphere","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE
 "knitr",">= 1.42","base/qaqc","Suggests",FALSE
 "knitr",">= 1.42","modules/allometry","Suggests",FALSE

From 222bd80dfc526bec9f688b8defbbbe197e67d88a Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Mon, 29 Jul 2024 02:26:01 -0700
Subject: [PATCH 091/155] knitr vignettes need rmarkdown

---
 .github/workflows/ci-weekly.yml |  3 +++
 .github/workflows/depends.yml   | 10 +++++++---
 base/visualization/DESCRIPTION  |  1 +
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-weekly.yml b/.github/workflows/ci-weekly.yml
index 18f16ff8bd5..36bdebe6950 100644
--- a/.github/workflows/ci-weekly.yml
+++ b/.github/workflows/ci-weekly.yml
@@ -13,6 +13,7 @@ jobs:
       fail-fast: false
       matrix:
         R:
+          - "4.3"
           - "devel"
     uses: ./.github/workflows/test.yml
     with:
@@ -24,6 +25,7 @@ jobs:
       fail-fast: false
       matrix:
         R:
+          - "4.3"
           - "devel"
     uses: ./.github/workflows/check.yml
     with:
@@ -36,6 +38,7 @@ jobs:
       fail-fast: false
       matrix:
         R:
+          - "4.3"
           - "devel"
     uses: ./.github/workflows/sipnet.yml
     with:
diff --git a/.github/workflows/depends.yml b/.github/workflows/depends.yml
index 7b5bde58c89..35070426540 100644
--- a/.github/workflows/depends.yml
+++ b/.github/workflows/depends.yml
@@ -31,6 +31,7 @@ jobs:
           - "4.1"
           - "4.2"
           - "4.3"
+          - "4.4"
           - "devel"
 
     steps:
@@ -49,11 +50,14 @@ jobs:
 
       # calculate some variables that are used later
       - name: github branch
-        # build Rdevel only on Mondays, others every day (but not twice on Mondays)
+        # build weekly-tested versions only on Mondays, others every day
+        # (but not twice on Mondays)
         if: |
           github.event_name == 'workflow_dispatch' ||
-          (matrix.R != 'devel' && github.event.schedule == '0 0 * * *') ||
-          (matrix.R == 'devel' && github.event.schedule == '30 1 * * 1')
+          (contains(fromJSON('["4.1", "4.2", "4.4"]'), matrix.R)
+            && github.event.schedule == '0 0 * * *') ||
+          (contains(fromJSON('[4.3", "devel"]'), matrix.R)
+            && github.event.schedule == '30 1 * * 1')
         run: |
           BRANCH=${GITHUB_REF##*/}
           echo "GITHUB_BRANCH=${BRANCH}" >> $GITHUB_ENV
diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION
index c7f735f435f..136d5a9dbbe 100644
--- a/base/visualization/DESCRIPTION
+++ b/base/visualization/DESCRIPTION
@@ -42,6 +42,7 @@ Suggests:
     mockery,
     png,
     raster,
+    rmarkdown,
     sp,
     testthat (>= 1.0.2),
     withr

From c9fdd7bbf54e8b24d5b84d81160a144742441cb5 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 15:37:24 +0530
Subject: [PATCH 092/155] Update DESCRIPTION removing keras3

reversion before keras3 update . perhaps if this doesn't work , I'll add it to imports instead of suggested , looking/hoping for improvements and then following up with step by step improvements /changes to NAMESPACES and dependencies as well .
---
 modules/assim.sequential/DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 9a3857ee803..8f8ed49080f 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -48,7 +48,6 @@ Suggests:
     plotrix,
     plyr (>= 1.8.4),
     randomForest,
-    keras3,
     raster,
     readr,
     reshape2 (>= 1.4.2),

From 828f312d2e6d60d56af16fee92ee338365fa6386 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Mon, 29 Jul 2024 03:06:50 -0700
Subject: [PATCH 093/155] depends

---
 docker/depends/pecan_package_dependencies.csv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 6bb9f38974b..c4ff7caaa45 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -483,6 +483,7 @@
 "rlang","*","modules/uncertainty","Imports",FALSE
 "rlang",">= 0.2.0","modules/data.atmosphere","Imports",FALSE
 "rlist","*","modules/assim.sequential","Suggests",FALSE
+"rmarkdown","*","base/visualization","Suggests",FALSE
 "rmarkdown",">= 2.19","base/db","Suggests",FALSE
 "rmarkdown",">= 2.19","base/qaqc","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/allometry","Suggests",FALSE

From 172bf55336b35303dbf1ffe267073a4e2ec8f136 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 17:31:25 +0530
Subject: [PATCH 094/155] degraded roxygen version to 7.3.1

brought back the roxygen version in accordance to pecan support . hoping this gets the masses of tests to pass.
---
 docker/depends/pecan_package_dependencies.csv | 84 +++++++++----------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index f8894581945..e2b5182d97f 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -487,48 +487,48 @@
 "rmarkdown",">= 2.19","modules/assim.batch","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/meta.analysis","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/photosynthesis","Suggests",FALSE
-"roxygen2","== 7.3.2","base/all","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/db","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/logger","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/qaqc","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/remote","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/settings","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/utils","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/visualization","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/workflow","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/basgra","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/biocro","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/cable","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/clm45","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/dalec","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/dvmdostem","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/ed","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/fates","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/gday","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/jules","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/ldndc","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/linkages","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/lpjguess","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/maat","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/maespa","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/preles","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/sibcasa","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/sipnet","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/stics","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/template","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/allometry","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/assim.batch","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/assim.sequential","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/benchmark","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.atmosphere","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.land","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.remote","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/emulator","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/meta.analysis","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/photosynthesis","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/priors","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/rtm","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/uncertainty","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/all","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/db","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/logger","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/qaqc","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/remote","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/settings","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/utils","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/visualization","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/workflow","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/basgra","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/biocro","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/cable","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/clm45","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/dalec","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/dvmdostem","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/ed","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/fates","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/gday","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/jules","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/ldndc","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/linkages","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/lpjguess","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/maat","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/maespa","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/preles","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/sibcasa","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/sipnet","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/stics","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/template","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/allometry","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/assim.batch","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/assim.sequential","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/benchmark","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.atmosphere","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.land","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.remote","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/emulator","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/meta.analysis","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/photosynthesis","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/priors","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/rtm","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/uncertainty","Roxygen",FALSE
 "RPostgres","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","models/biocro","Suggests",FALSE

From 5e43b35f774730c3cb0e5e651f076c0fabd4e127 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 18:11:40 +0530
Subject: [PATCH 095/155] Revert to last successful version

revert to last version of dependencies of the PR on the project
---
 docker/depends/pecan_package_dependencies.csv | 84 +++++++++----------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index e2b5182d97f..f8894581945 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -487,48 +487,48 @@
 "rmarkdown",">= 2.19","modules/assim.batch","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/meta.analysis","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/photosynthesis","Suggests",FALSE
-"roxygen2","== 7.3.1","base/all","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/db","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/logger","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/qaqc","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/remote","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/settings","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/utils","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/visualization","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/workflow","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/basgra","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/biocro","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/cable","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/clm45","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/dalec","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/dvmdostem","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/ed","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/fates","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/gday","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/jules","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/ldndc","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/linkages","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/lpjguess","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/maat","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/maespa","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/preles","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/sibcasa","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/sipnet","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/stics","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/template","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/allometry","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/assim.batch","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/assim.sequential","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/benchmark","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.atmosphere","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.land","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.remote","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/emulator","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/meta.analysis","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/photosynthesis","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/priors","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/rtm","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/uncertainty","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/all","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/db","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/logger","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/qaqc","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/remote","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/settings","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/utils","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/visualization","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/workflow","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/basgra","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/biocro","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/cable","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/clm45","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/dalec","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/dvmdostem","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/ed","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/fates","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/gday","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/jules","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/ldndc","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/linkages","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/lpjguess","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/maat","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/maespa","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/preles","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/sibcasa","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/sipnet","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/stics","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/template","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/allometry","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/assim.batch","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/assim.sequential","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/benchmark","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.atmosphere","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.land","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.remote","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/emulator","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/meta.analysis","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/photosynthesis","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/priors","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/rtm","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/uncertainty","Roxygen",FALSE
 "RPostgres","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","models/biocro","Suggests",FALSE

From 85ed698cdc196b9aac130174c6ab8bea3716e64e Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 09:22:43 -0400
Subject: [PATCH 096/155] Suggested change in  namespace and file input style
 modified

---
 .../assim.sequential/R/downscale_function_hrly.R | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 758ea6af7fb..3950d46830a 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -1,20 +1,18 @@
-#' @title North America Downscale Function
-#' @name NA_downscale_hrly
+#' SDA　Downscale Function for Hourly Data
+#' 
+#' This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+#' 
 #' @author Harunobu Ishii
-#'
-#' @param nc_data  In quotes, file path for .nc containing ensemble data.
+#' @param nc_file  In quotes, file path for .nc containing ensemble data.
 #' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
 #' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
-#' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
-#'
-#' @description This function uses the randomForest model.
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 #' @import ncdf4
 #' @export
 
-NA_downscale_hrly <- function(nc_data, coords, date, covariates){
-  
+SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
+  nc_data <- nc_open(nc_file)
   # Read the input data and site coordinates
   input_data <- ncvar_get(nc_data, "NEE")
   weights_rrel <- ncvar_get(nc_data, "weights_rrel")

From c6bb0d022fb9fe68d3d77c20074154823c3c6b3c Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 10:10:33 -0400
Subject: [PATCH 097/155] Time units uses lubridate

---
 .../assim.sequential/R/downscale_function_hrly.R    | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 3950d46830a..d05c31075ea 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -9,6 +9,7 @@
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 #' @import ncdf4
+#' @import lubridate
 #' @export
 
 SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
@@ -20,8 +21,16 @@ SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
   # Timereadable
   time <- nc_data$dim$time$vals
   time_units <- nc_data$dim$time$units
-  time_origin <- as.POSIXct(substr(time_units, 12, 31), format="%Y-%m-%dT%H:%M")
-  time_readable <- time_origin + time * 3600  # Convert hours to seconds
+  time_origin_str <- substr(time_units, 12, 31)
+  time_origin <- ymd_hm(time_origin_str, tz="EST")
+  # Check if time units are in hours and convert appropriately
+  if (grepl("hours", time_units)) {
+    time_readable <- time_origin + dhours(time)
+  } else if (grepl("seconds", time_units)) {
+    time_readable <- time_origin + dseconds(time)
+  } else {
+    stop("Unsupported time units")
+  }
   
   # Extract predictors from covariates raster using site coordinates
   site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")

From fa209b4942d93ceaa43915346a6f0c732f3c2e23 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Mon, 29 Jul 2024 21:16:32 +0530
Subject: [PATCH 098/155] added keras3 to the suggests

added keras3 to the suggests , as descried in discussion , moving back to the base suggestions implemented , one by one
---
 modules/assim.sequential/DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 8f8ed49080f..9a3857ee803 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -48,6 +48,7 @@ Suggests:
     plotrix,
     plyr (>= 1.8.4),
     randomForest,
+    keras3,
     raster,
     readr,
     reshape2 (>= 1.4.2),

From db889819055dfca6a84592810a1dc0952acaec44 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Mon, 29 Jul 2024 09:30:28 -0700
Subject: [PATCH 099/155] fix Roxygen breakage from sneaky curly apostrophe

---
 modules/assim.sequential/R/sda.enkf.R         |  4 +-
 .../assim.sequential/man/sda.enkf.original.Rd | 43 +++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 modules/assim.sequential/man/sda.enkf.original.Rd

diff --git a/modules/assim.sequential/R/sda.enkf.R b/modules/assim.sequential/R/sda.enkf.R
index 3dff5d07c79..20f1674d034 100644
--- a/modules/assim.sequential/R/sda.enkf.R
+++ b/modules/assim.sequential/R/sda.enkf.R
@@ -1,6 +1,6 @@
 ##' State Variable Data Assimilation: Ensemble Kalman Filter
-##’
-##’ Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast),
+##'
+##' Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast),
 ##'  a new workflow folder is created and the previous forecast for the start_time is copied over.
 ##' During restart the initial run before the loop is skipped, with the info being populated from the previous run.
 ##' The function then dives right into the first Analysis, then continues on like normal.
diff --git a/modules/assim.sequential/man/sda.enkf.original.Rd b/modules/assim.sequential/man/sda.enkf.original.Rd
new file mode 100644
index 00000000000..6b59849a8a3
--- /dev/null
+++ b/modules/assim.sequential/man/sda.enkf.original.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sda.enkf.R
+\name{sda.enkf.original}
+\alias{sda.enkf.original}
+\title{State Variable Data Assimilation: Ensemble Kalman Filter}
+\usage{
+sda.enkf.original(
+  settings,
+  obs.mean,
+  obs.cov,
+  IC = NULL,
+  Q = NULL,
+  adjustment = TRUE,
+  restart = NULL
+)
+}
+\arguments{
+\item{settings}{PEcAn settings object}
+
+\item{obs.mean}{list of observations of the means of state variable (time X nstate)}
+
+\item{obs.cov}{list of observations of covariance matrices of state variables (time X nstate X nstate)}
+
+\item{IC}{initial conditions}
+
+\item{Q}{process covariance matrix given if there is no data to estimate it}
+
+\item{adjustment}{flag for using ensemble adjustment filter or not}
+
+\item{restart}{Used for iterative updating previous forecasts. This is a list that includes ens.inputs, the list of inputs by ensemble member, params, the parameters, and old_outdir, the output directory from the previous workflow. These three things are needed to ensure that if a new workflow is started that ensemble members keep there run-specific met and params. See Details}
+}
+\value{
+NONE
+}
+\description{
+Restart mode:  Basic idea is that during a restart (primary case envisioned as an iterative forecast),
+ a new workflow folder is created and the previous forecast for the start_time is copied over.
+During restart the initial run before the loop is skipped, with the info being populated from the previous run.
+The function then dives right into the first Analysis, then continues on like normal.
+}
+\author{
+Michael Dietze and Ann Raiho \email{dietze@bu.edu}
+}

From 2481f31c70dc25e4fa4a6e66ea79e7f10aca9301 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Mon, 29 Jul 2024 09:52:57 -0700
Subject: [PATCH 100/155] typo

---
 modules/allometry/R/allom.predict.R    | 2 +-
 modules/allometry/man/allom.predict.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/allometry/R/allom.predict.R b/modules/allometry/R/allom.predict.R
index 5dc032caa9f..2423fc283db 100644
--- a/modules/allometry/R/allom.predict.R
+++ b/modules/allometry/R/allom.predict.R
@@ -25,7 +25,7 @@
 #' @param n Number of Monte Carlo samples. Defaults to the same number as in the MCMC object
 #' @param use  c('Bg','mu','best')
 #' @param interval c('none','confidence','prediction') default is prediction
-#' @param single.tree logical: Is this a DBH time series from one indidual tree?
+#' @param single.tree logical: Is this a DBH time series from one individual tree?
 #'  If TRUE, will use a fixed error for all draws.
 #'
 #' @return matrix of Monte Carlo predictions that has n rows and one column per DBH
diff --git a/modules/allometry/man/allom.predict.Rd b/modules/allometry/man/allom.predict.Rd
index 0c11064a542..031510f2504 100644
--- a/modules/allometry/man/allom.predict.Rd
+++ b/modules/allometry/man/allom.predict.Rd
@@ -36,7 +36,7 @@ Can be NULL if only one PFT/species exists, otherwise needs to the same length a
 
 \item{interval}{c('none','confidence','prediction') default is prediction}
 
-\item{single.tree}{logical: Is this a DBH time series from one indidual tree?
+\item{single.tree}{logical: Is this a DBH time series from one individual tree?
 If TRUE, will use a fixed error for all draws.}
 }
 \value{

From 900acee8f5f368be27620963718aef2d075c9b0b Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 14:28:10 -0400
Subject: [PATCH 101/155] downscale func takes time series

---
 modules/assim.sequential/NAMESPACE            |   2 +-
 .../R/downscale_function_hrly.R               | 122 +++++++++---------
 ...ownscale_hrly.Rd => SDA_downscale_hrly.Rd} |  19 ++-
 3 files changed, 67 insertions(+), 76 deletions(-)
 rename modules/assim.sequential/man/{NA_downscale_hrly.Rd => SDA_downscale_hrly.Rd} (68%)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 1abf9dea8b9..5984425da4e 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -13,12 +13,12 @@ export(GEF.MultiSite)
 export(GEF.MultiSite.Nimble)
 export(GrabFillMatrix)
 export(Local.support)
-export(NA_downscale_hrly)
 export(Obs.data.prepare.MultiSite)
 export(Prep_OBS_SDA)
 export(Remote_Sync_launcher)
 export(SDA_OBS_Assembler)
 export(SDA_control)
+export(SDA_downscale_hrly)
 export(SDA_remote_launcher)
 export(SDA_timeseries_plot)
 export(adj.ens)
diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index d05c31075ea..c89475e0c5f 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -12,18 +12,18 @@
 #' @import lubridate
 #' @export
 
-SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
-  nc_data <- nc_open(nc_file)
+SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
+  
   # Read the input data and site coordinates
+  nc_data <- nc_open(nc_file)
   input_data <- ncvar_get(nc_data, "NEE")
-  weights_rrel <- ncvar_get(nc_data, "weights_rrel")
+  covariate_names <- names(covariates)
   
   # Timereadable
   time <- nc_data$dim$time$vals
   time_units <- nc_data$dim$time$units
   time_origin_str <- substr(time_units, 12, 31)
   time_origin <- ymd_hm(time_origin_str, tz="EST")
-  # Check if time units are in hours and convert appropriately
   if (grepl("hours", time_units)) {
     time_readable <- time_origin + dhours(time)
   } else if (grepl("seconds", time_units)) {
@@ -34,70 +34,64 @@ SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
   
   # Extract predictors from covariates raster using site coordinates
   site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")
-  index <- which(time_readable == date)
-  data <- input_data[index, , ]
-  carbon_data <- as.data.frame(data)
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) 
 
-  # Arrange relative weights of each ensemble member over time and space/site
-  curr_weights_rrel <- weights_rrel[, , index]
-  names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
-  colnames(curr_weights_rrel) <- paste0("ensemble",seq(1:ncol(curr_weights_rrel)))
-  
-  # Combine each ensemble member with all predictors
-  ensembles <- list()
-  for (i in seq_along(carbon_data)) {
-    ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
-  }
-  
-  # Rename the carbon_data column for each ensemble member
-  for (i in 1:length(ensembles)) {
-    ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
-  }
-  
-  # Split the observations in each data frame into two data frames based on the proportion of 3/4
-  ensembles <- lapply(ensembles, function(df) {
-    sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
-    train  <- df[sample, ]
-    test   <- df[-sample, ]
-    split_list <- list(train, test)
-    return(split_list)
-  })
-  
-  # Rename the training and testing data frames for each ensemble member
-  for (i in 1:length(ensembles)) {
-    # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
-    names(ensembles[[i]]) <- c("training", "testing")
-  }
-  
-  # Train a random forest model for each ensemble member using the training data
-  rf_output <- list()
-  for (i in 1:length(ensembles)) {
-    rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
-                                                 data = ensembles[[i]][[1]],
-                                                 ntree = 1000,
-                                                 na.action = stats::na.omit,
-                                                 keep.forest = T,
-                                                 importance = T)
-  }
-  
-  # Generate predictions (maps) for each ensemble member using the trained models
-  maps <- list(ncol(rf_output))
-  for (i in 1:length(rf_output)) {
-    maps[[i]] <- terra::predict(object = covariates,
-                                model = rf_output[[i]],na.rm = T)
-  }
+  downscale_output<- list()
   
-  # Organize the results into a single output list
-  downscale_output <- list(ensembles, rf_output, maps, curr_weights_rrel)
+  # Train & Test split
+  sample <- sample(1:nrow(predictors), size = round(0.75*nrow(predictors)))
   
-  # Rename each element of the output list with appropriate ensemble numbers
-  for (i in 1:(length(downscale_output)-1)) {
-    names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
+  # Predict for each time stamp of the year selected
+  time_indices <- which(year(time_readable) == yyyy)
+  for (index in time_indices) {
+    if(index == 37986){
+      break
+    }
+    data <- input_data[index, , ]
+    carbon_data <- as.data.frame(data)
+    names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
+
+    # Combine carbon data and covariates/predictors and split into training/test
+    full_data <- cbind(carbon_data, predictors)
+    train_data <- full_data[sample, ]
+    test_data <- full_data[-sample, ]
+    
+    # Combine each ensemble member with all predictors
+    models <- list()
+    maps <- list()
+    predictions <- list()
+    ensembles <- list()
+    for (i in seq_along(carbon_data)) {
+      ensemble_col <- paste0("ensemble", i)
+      formula <- stats::as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + ")))
+      models[[i]] <- randomForest::randomForest(formula,
+                                                data = train_data,
+                                                ntree = 1000,
+                                                na.action = stats::na.omit,
+                                                keep.forest = TRUE,
+                                                importance = TRUE)
+      
+      maps[[i]] <- terra::predict(covariates, model = models[[i]], na.rm = TRUE)
+      predictions[[i]] <- stats::predict(models[[i]], test_data)
+    }
+
+    # Organize the results into a single output list
+    curr_downscaled <- list( data = list(training = train_data, testing = test_data),
+                             models = models,
+                             maps = maps,
+                             predictions = predictions
+                            )
+    
+    # Rename each element of the output list with appropriate ensemble numbers
+    for (i in 1:length(curr_downscaled$data)) {
+      names(curr_downscaled$data[[i]]) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    }
+    names(curr_downscaled$models) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    names(curr_downscaled$maps) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    names(curr_downscaled$predictions) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    
+    downscale_output[[as.character(time_readable[index])]]<-curr_downscaled
   }
-  
-  # Rename the main components of the output list
-  names(downscale_output) <- c("data", "models", "maps", "weights_rrel")
-  
+  nc_close(nc_data)
   return(downscale_output)
 }
diff --git a/modules/assim.sequential/man/NA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
similarity index 68%
rename from modules/assim.sequential/man/NA_downscale_hrly.Rd
rename to modules/assim.sequential/man/SDA_downscale_hrly.Rd
index 1a8984575c7..1b9b66212ed 100644
--- a/modules/assim.sequential/man/NA_downscale_hrly.Rd
+++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
@@ -1,28 +1,25 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/downscale_function_hrly.R
-\name{NA_downscale_hrly}
-\alias{NA_downscale_hrly}
-\title{North America Downscale Function}
+\name{SDA_downscale_hrly}
+\alias{SDA_downscale_hrly}
+\title{SDA　Downscale Function for Hourly Data}
 \usage{
-NA_downscale_hrly(nc_data, coords, date, covariates)
+SDA_downscale_hrly(nc_file, coords, yyyy, covariates)
 }
 \arguments{
-\item{nc_data}{In quotes, file path for .nc containing ensemble data.}
+\item{nc_file}{In quotes, file path for .nc containing ensemble data.}
 
 \item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
 
-\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
-
 \item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+
+\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
 }
 \value{
 It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 }
 \description{
-This function uses the randomForest model.
-}
-\details{
-This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations
 }
 \author{
 Harunobu Ishii

From 8c4234ad7b4b496ad2d16bad8f4f7931e6a7ed7c Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 14:29:32 -0400
Subject: [PATCH 102/155] downscale func takes time series

---
 modules/assim.sequential/R/downscale_function_hrly.R | 2 +-
 modules/assim.sequential/man/SDA_downscale_hrly.Rd   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index c89475e0c5f..b76caac31f3 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -5,7 +5,7 @@
 #' @author Harunobu Ishii
 #' @param nc_file  In quotes, file path for .nc containing ensemble data.
 #' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
-#' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
+#' @param yyyy In string, format is yyyy(year of interest)
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 #' @import ncdf4
diff --git a/modules/assim.sequential/man/SDA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
index 1b9b66212ed..6cfe7eba902 100644
--- a/modules/assim.sequential/man/SDA_downscale_hrly.Rd
+++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
@@ -11,9 +11,9 @@ SDA_downscale_hrly(nc_file, coords, yyyy, covariates)
 
 \item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
 
-\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+\item{yyyy}{In string, format is yyyy(year of interest)}
 
-\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
+\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
 }
 \value{
 It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.

From bfeb8e577033a038481a3d92d80ff50e848c7da5 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 14:32:48 -0400
Subject: [PATCH 103/155] Time Zone Checked

---
 .../assim.sequential/R/downscale_function_hrly.R  | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index b76caac31f3..01f18904f00 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -19,11 +19,22 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   input_data <- ncvar_get(nc_data, "NEE")
   covariate_names <- names(covariates)
   
-  # Timereadable
+  
+  # Extract time and units
   time <- nc_data$dim$time$vals
   time_units <- nc_data$dim$time$units
   time_origin_str <- substr(time_units, 12, 31)
-  time_origin <- ymd_hm(time_origin_str, tz="EST")
+  
+  # Check if timezone is specified in the time units string
+  if (grepl("UTC|GMT", time_units)) {
+    time_origin <- ymd_hm(time_origin_str, tz = "UTC")
+  } else if (grepl("EST", time_units)) {
+    time_origin <- ymd_hm(time_origin_str, tz = "EST")
+  } else {
+    time_origin <- ymd_hm(time_origin_str, tz = "UTC")  # Default to UTC if not specified
+  }
+  
+  # Timereadable
   if (grepl("hours", time_units)) {
     time_readable <- time_origin + dhours(time)
   } else if (grepl("seconds", time_units)) {

From b1bd57fa514416e771f7381d6334cc0a25dc58ec Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 30 Jul 2024 04:40:27 +0530
Subject: [PATCH 104/155] Update DESCRIPTION

updated with correct date and version number
---
 modules/assim.sequential/DESCRIPTION | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 9a3857ee803..169ee82bc40 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -1,8 +1,7 @@
 Package: PEcAnAssimSequential
 Type: Package
 Title: PEcAn Functions Used for Ecological Forecasts and Reanalysis
-Version: 1.7.2
-Date: 2021-10-04
+Version: 1.8.0.9000
 Author: Mike Dietze
 Maintainer: Mike Dietze <dietze@bu.edu>
 Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
@@ -33,6 +32,7 @@ Imports:
     stringr
 Suggests:
     corrplot,
+    exactextractr,
     ggrepel,
     emdbook,
     glue,
@@ -65,4 +65,4 @@ Suggests:
 License: BSD_3_clause + file LICENSE
 Copyright: Authors
 Encoding: UTF-8
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2

From fa9ed0423725734ce9143ac7c8b3ba70583b9d47 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 30 Jul 2024 04:43:12 +0530
Subject: [PATCH 105/155] Update NAMESPACE

updated namespace after updated DESCRIPTION
---
 modules/assim.sequential/NAMESPACE | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index bb8aa415da4..5a69d8bd9ef 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(outlier,detector.boxplot)
 export(Analysis.sda)
 export(Construct.H.multisite)
 export(Construct.R)
@@ -21,7 +22,6 @@ export(SDA_control)
 export(SDA_remote_launcher)
 export(SDA_timeseries_plot)
 export(adj.ens)
-export(aggregate)
 export(alltocs)
 export(alr)
 export(assessParams)
@@ -37,7 +37,6 @@ export(load_data_paleon_sda)
 export(matrix_network)
 export(metSplit)
 export(obs_timestep2timepoint)
-export(outlier.detector.boxplot)
 export(piecew.poly.local)
 export(post.analysis.ggplot)
 export(post.analysis.ggplot.violin)
@@ -56,7 +55,6 @@ export(sda_weights_site)
 export(simple.local)
 export(tobit.model)
 export(tobit2space.model)
-export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)

From 0cf995a432807c2a46b308a5d5a027fcfc3e0871 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 30 Jul 2024 04:46:01 +0530
Subject: [PATCH 106/155] Update DESCRIPTION

updated the version number of roxygen
---
 modules/assim.sequential/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 169ee82bc40..97eedfc06a3 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -65,4 +65,4 @@ Suggests:
 License: BSD_3_clause + file LICENSE
 Copyright: Authors
 Encoding: UTF-8
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.1

From 6686b8decba44727d9d4e8283a7b7b4bffa6e88e Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 30 Jul 2024 04:53:19 +0530
Subject: [PATCH 107/155] Update NAMESPACE

NAMESPACES after running the DESCRIPTION changes to roxygen 7.3.1

From 7a66814dc7a686d2c1ca0fe0309483748459f15b Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Tue, 30 Jul 2024 09:39:38 -0400
Subject: [PATCH 108/155] Update NAMESPACE

---
 modules/assim.sequential/NAMESPACE | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 5a69d8bd9ef..4564933c89a 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,7 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(outlier,detector.boxplot)
 export(Analysis.sda)
+export(aggregate)
 export(Construct.H.multisite)
 export(Construct.R)
 export(Construct_H)
@@ -37,6 +37,7 @@ export(load_data_paleon_sda)
 export(matrix_network)
 export(metSplit)
 export(obs_timestep2timepoint)
+export(outlier.detector.boxplot)
 export(piecew.poly.local)
 export(post.analysis.ggplot)
 export(post.analysis.ggplot.violin)
@@ -55,6 +56,7 @@ export(sda_weights_site)
 export(simple.local)
 export(tobit.model)
 export(tobit2space.model)
+export(tobit_mocel_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)

From a25e6639e868f867435732642b91e02724dbdef3 Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Tue, 30 Jul 2024 09:41:40 -0400
Subject: [PATCH 109/155] Update NAMESPACE

---
 modules/assim.sequential/NAMESPACE | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 4564933c89a..b31fd8320e8 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,7 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
 export(Analysis.sda)
-export(aggregate)
 export(Construct.H.multisite)
 export(Construct.R)
 export(Construct_H)
@@ -24,6 +23,7 @@ export(SDA_timeseries_plot)
 export(adj.ens)
 export(alltocs)
 export(alr)
+export(aggregate)
 export(assessParams)
 export(block_matrix)
 export(conj_wt_wishart_sampler)
@@ -56,7 +56,7 @@ export(sda_weights_site)
 export(simple.local)
 export(tobit.model)
 export(tobit2space.model)
-export(tobit_mocel_censored)
+export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)

From b7ac5466ac40d9257687ce4491e9bd4fae2c4e46 Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Tue, 30 Jul 2024 09:42:14 -0400
Subject: [PATCH 110/155] Update NAMESPACE

---
 modules/assim.sequential/NAMESPACE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index b31fd8320e8..bb8aa415da4 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -21,9 +21,9 @@ export(SDA_control)
 export(SDA_remote_launcher)
 export(SDA_timeseries_plot)
 export(adj.ens)
+export(aggregate)
 export(alltocs)
 export(alr)
-export(aggregate)
 export(assessParams)
 export(block_matrix)
 export(conj_wt_wishart_sampler)

From b61fd4e2bd8fbc76c06f35c311b8082b3dc9fb7f Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Tue, 30 Jul 2024 09:43:21 -0400
Subject: [PATCH 111/155] Update modules/assim.sequential/DESCRIPTION

---
 modules/assim.sequential/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 97eedfc06a3..169ee82bc40 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -65,4 +65,4 @@ Suggests:
 License: BSD_3_clause + file LICENSE
 Copyright: Authors
 Encoding: UTF-8
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2

From f56fd9c6a4ca9dd56700f65ac25744a638ed09bf Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Wed, 31 Jul 2024 14:14:58 -0400
Subject: [PATCH 112/155] Updated downscale based on comment

---
 .../assim.sequential/R/downscale_function_hrly.R  | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 01f18904f00..70b09e6fc7d 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -8,14 +8,13 @@
 #' @param yyyy In string, format is yyyy(year of interest)
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-#' @import ncdf4
-#' @import lubridate
 #' @export
 
 SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   
   # Read the input data and site coordinates
-  nc_data <- nc_open(nc_file)
+  nc_data <- ncdf4::nc_open(nc_file)
+  on.exit(ncdf4::nc_close(nc_data))
   input_data <- ncvar_get(nc_data, "NEE")
   covariate_names <- names(covariates)
   
@@ -27,11 +26,11 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   
   # Check if timezone is specified in the time units string
   if (grepl("UTC|GMT", time_units)) {
-    time_origin <- ymd_hm(time_origin_str, tz = "UTC")
+    time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC")
   } else if (grepl("EST", time_units)) {
-    time_origin <- ymd_hm(time_origin_str, tz = "EST")
+    time_origin <- lubridate::ymd_hm(time_origin_str, tz = "EST")
   } else {
-    time_origin <- ymd_hm(time_origin_str, tz = "UTC")  # Default to UTC if not specified
+    time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC")  # Default to UTC if not specified
   }
   
   # Timereadable
@@ -55,9 +54,6 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   # Predict for each time stamp of the year selected
   time_indices <- which(year(time_readable) == yyyy)
   for (index in time_indices) {
-    if(index == 37986){
-      break
-    }
     data <- input_data[index, , ]
     carbon_data <- as.data.frame(data)
     names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
@@ -103,6 +99,5 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
     
     downscale_output[[as.character(time_readable[index])]]<-curr_downscaled
   }
-  nc_close(nc_data)
   return(downscale_output)
 }

From 6893b40d9c2702294ef7c19f2169c62e1b96eb51 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Wed, 31 Jul 2024 12:49:34 -0700
Subject: [PATCH 113/155] typo

---
 .github/workflows/depends.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/depends.yml b/.github/workflows/depends.yml
index 35070426540..7416d7486de 100644
--- a/.github/workflows/depends.yml
+++ b/.github/workflows/depends.yml
@@ -56,7 +56,7 @@ jobs:
           github.event_name == 'workflow_dispatch' ||
           (contains(fromJSON('["4.1", "4.2", "4.4"]'), matrix.R)
             && github.event.schedule == '0 0 * * *') ||
-          (contains(fromJSON('[4.3", "devel"]'), matrix.R)
+          (contains(fromJSON('["4.3", "devel"]'), matrix.R)
             && github.event.schedule == '30 1 * * 1')
         run: |
           BRANCH=${GITHUB_REF##*/}

From 6ca6b651cc2cd90dc78af01865738581edeb1014 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Wed, 31 Jul 2024 17:04:42 -0400
Subject: [PATCH 114/155] name space added

---
 modules/assim.sequential/R/downscale_function_hrly.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 70b09e6fc7d..25da4c62150 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -15,7 +15,7 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   # Read the input data and site coordinates
   nc_data <- ncdf4::nc_open(nc_file)
   on.exit(ncdf4::nc_close(nc_data))
-  input_data <- ncvar_get(nc_data, "NEE")
+  input_data <- ncdf4::ncvar_get(nc_data, "NEE")
   covariate_names <- names(covariates)
   
   
@@ -35,9 +35,9 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   
   # Timereadable
   if (grepl("hours", time_units)) {
-    time_readable <- time_origin + dhours(time)
+    time_readable <- time_origin + lubridate::dhours(time)
   } else if (grepl("seconds", time_units)) {
-    time_readable <- time_origin + dseconds(time)
+    time_readable <- time_origin + lubridate::dseconds(time)
   } else {
     stop("Unsupported time units")
   }

From 732b966e576b4954cd07136235c8740e84f86226 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 1 Aug 2024 02:55:30 +0530
Subject: [PATCH 115/155] Update pecan_package_dependencies.csv

in accordance with new NAMESPACE and DESCRIPTION.
---
 docker/depends/pecan_package_dependencies.csv | 87 +++++++++----------
 1 file changed, 42 insertions(+), 45 deletions(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index c4ff7caaa45..f8c54204a73 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -122,8 +122,7 @@
 "jsonlite","*","models/stics","Imports",FALSE
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
-"knitr","*","base/visualization","Suggests",FALSE
-"knitr","*","modules/data.atmosphere","Suggests",FALSE
+"keras3","*","modules/assim.sequential","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE
 "knitr",">= 1.42","base/qaqc","Suggests",FALSE
 "knitr",">= 1.42","modules/allometry","Suggests",FALSE
@@ -483,55 +482,54 @@
 "rlang","*","modules/uncertainty","Imports",FALSE
 "rlang",">= 0.2.0","modules/data.atmosphere","Imports",FALSE
 "rlist","*","modules/assim.sequential","Suggests",FALSE
-"rmarkdown","*","base/visualization","Suggests",FALSE
 "rmarkdown",">= 2.19","base/db","Suggests",FALSE
 "rmarkdown",">= 2.19","base/qaqc","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/allometry","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/assim.batch","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/meta.analysis","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/photosynthesis","Suggests",FALSE
-"roxygen2","== 7.3.2","base/all","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/db","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/logger","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/qaqc","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/remote","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/settings","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/utils","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/visualization","Roxygen",FALSE
-"roxygen2","== 7.3.2","base/workflow","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/basgra","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/biocro","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/cable","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/clm45","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/dalec","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/dvmdostem","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/ed","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/fates","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/gday","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/jules","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/ldndc","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/linkages","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/lpjguess","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/maat","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/maespa","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/preles","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/sibcasa","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/sipnet","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/stics","Roxygen",FALSE
-"roxygen2","== 7.3.2","models/template","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/allometry","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/assim.batch","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/all","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/db","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/logger","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/qaqc","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/remote","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/settings","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/utils","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/visualization","Roxygen",FALSE
+"roxygen2","== 7.3.1","base/workflow","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/basgra","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/biocro","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/cable","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/clm45","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/dalec","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/dvmdostem","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/ed","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/fates","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/gday","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/jules","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/ldndc","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/linkages","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/lpjguess","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/maat","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/maespa","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/preles","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/sibcasa","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/sipnet","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/stics","Roxygen",FALSE
+"roxygen2","== 7.3.1","models/template","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/allometry","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/assim.batch","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/benchmark","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.atmosphere","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.land","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/data.remote","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/emulator","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/meta.analysis","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/photosynthesis","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/priors","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/rtm","Roxygen",FALSE
+"roxygen2","== 7.3.1","modules/uncertainty","Roxygen",FALSE
 "roxygen2","== 7.3.2","modules/assim.sequential","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/benchmark","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.atmosphere","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.land","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/data.remote","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/emulator","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/meta.analysis","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/photosynthesis","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/priors","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/rtm","Roxygen",FALSE
-"roxygen2","== 7.3.2","modules/uncertainty","Roxygen",FALSE
 "RPostgres","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","models/biocro","Suggests",FALSE
@@ -612,7 +610,6 @@
 "testthat",">= 3.0.4","base/qaqc","Suggests",FALSE
 "tibble","*","base/db","Imports",FALSE
 "tibble","*","models/ed","Imports",FALSE
-"tibble","*","models/fates","Imports",FALSE
 "tibble","*","models/lpjguess","Imports",FALSE
 "tibble","*","modules/data.atmosphere","Imports",FALSE
 "tibble","*","modules/data.remote","Suggests",FALSE

From 6389e93c9b717ff5f3ca0314300115af4dd79aa0 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 1 Aug 2024 03:41:40 +0530
Subject: [PATCH 116/155] Update pecan_package_dependencies.csv

full dependencies after reinstall . last results were 4/13
---
 docker/depends/pecan_package_dependencies.csv | 86 ++++++++++---------
 1 file changed, 45 insertions(+), 41 deletions(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index f8c54204a73..2a4a14dfe13 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -123,6 +123,8 @@
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
 "keras3","*","modules/assim.sequential","Suggests",FALSE
+"knitr","*","base/visualization","Suggests",FALSE
+"knitr","*","modules/data.atmosphere","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE
 "knitr",">= 1.42","base/qaqc","Suggests",FALSE
 "knitr",">= 1.42","modules/allometry","Suggests",FALSE
@@ -482,54 +484,55 @@
 "rlang","*","modules/uncertainty","Imports",FALSE
 "rlang",">= 0.2.0","modules/data.atmosphere","Imports",FALSE
 "rlist","*","modules/assim.sequential","Suggests",FALSE
+"rmarkdown","*","base/visualization","Suggests",FALSE
 "rmarkdown",">= 2.19","base/db","Suggests",FALSE
 "rmarkdown",">= 2.19","base/qaqc","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/allometry","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/assim.batch","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/meta.analysis","Suggests",FALSE
 "rmarkdown",">= 2.19","modules/photosynthesis","Suggests",FALSE
-"roxygen2","== 7.3.1","base/all","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/db","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/logger","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/qaqc","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/remote","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/settings","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/utils","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/visualization","Roxygen",FALSE
-"roxygen2","== 7.3.1","base/workflow","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/basgra","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/biocro","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/cable","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/clm45","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/dalec","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/dvmdostem","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/ed","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/fates","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/gday","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/jules","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/ldndc","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/linkages","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/lpjguess","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/maat","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/maespa","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/preles","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/sibcasa","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/sipnet","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/stics","Roxygen",FALSE
-"roxygen2","== 7.3.1","models/template","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/allometry","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/assim.batch","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/benchmark","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.atmosphere","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.land","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/data.remote","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/emulator","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/meta.analysis","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/photosynthesis","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/priors","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/rtm","Roxygen",FALSE
-"roxygen2","== 7.3.1","modules/uncertainty","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/all","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/db","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/logger","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/qaqc","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/remote","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/settings","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/utils","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/visualization","Roxygen",FALSE
+"roxygen2","== 7.3.2","base/workflow","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/basgra","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/biocro","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/cable","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/clm45","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/dalec","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/dvmdostem","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/ed","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/fates","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/gday","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/jules","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/ldndc","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/linkages","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/lpjguess","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/maat","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/maespa","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/preles","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/sibcasa","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/sipnet","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/stics","Roxygen",FALSE
+"roxygen2","== 7.3.2","models/template","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/allometry","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/assim.batch","Roxygen",FALSE
 "roxygen2","== 7.3.2","modules/assim.sequential","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/benchmark","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.atmosphere","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.land","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/data.remote","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/emulator","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/meta.analysis","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/photosynthesis","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/priors","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/rtm","Roxygen",FALSE
+"roxygen2","== 7.3.2","modules/uncertainty","Roxygen",FALSE
 "RPostgres","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","base/db","Suggests",FALSE
 "RPostgreSQL","*","models/biocro","Suggests",FALSE
@@ -610,6 +613,7 @@
 "testthat",">= 3.0.4","base/qaqc","Suggests",FALSE
 "tibble","*","base/db","Imports",FALSE
 "tibble","*","models/ed","Imports",FALSE
+"tibble","*","models/fates","Imports",FALSE
 "tibble","*","models/lpjguess","Imports",FALSE
 "tibble","*","modules/data.atmosphere","Imports",FALSE
 "tibble","*","modules/data.remote","Suggests",FALSE

From 91ef69ffeef224a21ee5dd5af6dd562038db7f47 Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Thu, 1 Aug 2024 11:37:50 -0400
Subject: [PATCH 117/155] Update modules/assim.sequential/DESCRIPTION

---
 modules/assim.sequential/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 169ee82bc40..de07e4734b4 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -48,7 +48,7 @@ Suggests:
     plotrix,
     plyr (>= 1.8.4),
     randomForest,
-    keras3,
+    keras3 (>=1.0.0),
     raster,
     readr,
     reshape2 (>= 1.4.2),

From 1d215b7b9f1ec4d4daf3d7c31bba9dbac1ba3f79 Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Thu, 1 Aug 2024 11:50:50 -0400
Subject: [PATCH 118/155] Update modules/assim.sequential/DESCRIPTION

---
 modules/assim.sequential/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index de07e4734b4..dfa454384cd 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -48,7 +48,7 @@ Suggests:
     plotrix,
     plyr (>= 1.8.4),
     randomForest,
-    keras3 (>=1.0.0),
+    keras3 (>= 1.0.0),
     raster,
     readr,
     reshape2 (>= 1.4.2),

From 7b9ec87c7745df182eb1de134ef4b5e4429f5217 Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Thu, 1 Aug 2024 12:03:32 -0400
Subject: [PATCH 119/155] Update docker/depends/pecan_package_dependencies.csv

---
 docker/depends/pecan_package_dependencies.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 2a4a14dfe13..99600415862 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -122,7 +122,7 @@
 "jsonlite","*","models/stics","Imports",FALSE
 "jsonlite","*","modules/data.atmosphere","Imports",FALSE
 "jsonlite","*","modules/data.remote","Suggests",FALSE
-"keras3","*","modules/assim.sequential","Suggests",FALSE
+"keras3",">= 1.0.0","modules/assim.sequential","Suggests",FALSE
 "knitr","*","base/visualization","Suggests",FALSE
 "knitr","*","modules/data.atmosphere","Suggests",FALSE
 "knitr",">= 1.42","base/db","Suggests",FALSE

From 59140ce8cd53b853f36c28c3f5ddc057fb04165e Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Thu, 1 Aug 2024 15:36:05 -0700
Subject: [PATCH 120/155] Update
 modules/meta.analysis/tests/testthat/test.jagify.R

---
 modules/meta.analysis/tests/testthat/test.jagify.R | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/modules/meta.analysis/tests/testthat/test.jagify.R b/modules/meta.analysis/tests/testthat/test.jagify.R
index 7541783f18b..50949a7a5d7 100644
--- a/modules/meta.analysis/tests/testthat/test.jagify.R
+++ b/modules/meta.analysis/tests/testthat/test.jagify.R
@@ -1,11 +1,3 @@
-#-------------------------------------------------------------------------------
-# Copyright (c) 2012 University of Illinois, NCSA.
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the 
-# University of Illinois/NCSA Open Source License
-# which accompanies this distribution, and is available at
-# http://opensource.ncsa.illinois.edu/license.html
-#-------------------------------------------------------------------------------
 
 test_that("jagify correctly assigns treatment index of 1 to all control treatments, regardless of alphabetical order", {
   ## generate test data; controls assigned to early alphabet and late alphabet trt names

From ee746f2e73bcb6929f6389695b05dd3060478402 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Sat, 3 Aug 2024 01:17:37 -0700
Subject: [PATCH 121/155] make a named volume for the Bety logs mountpoint

Best I can tell nothing gets written to this, but because the Bety image
specifies this path as a VOLUME, we were creating a new anonymous-but-permanent
volume every time Compose spun up
---
 docker-compose.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 349e85cca7b..92af1a03a95 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -79,6 +79,8 @@ services:
       - SECRET_KEY_BASE=${BETY_SECRET_KEY:-notasecret}
       - RAILS_RELATIVE_URL_ROOT=/bety
       - LOCAL_SERVER=${BETY_LOCAL_SERVER:-99}
+    volumes:
+      - bety:/home/bety/log
     depends_on:
       - postgres
     labels:
@@ -346,6 +348,7 @@ networks:
 volumes:
   traefik:
   postgres:
+  bety:
   rabbitmq:
   pecan:
   rstudio:

From 0a8767fa04b63b9225e25f4342343c7aa72d1c7d Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Sat, 3 Aug 2024 01:25:05 -0700
Subject: [PATCH 122/155] prod too

---
 docker-compose.prod.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index 92343d2c63a..6de05893cf8 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -81,6 +81,8 @@ services:
       - SECRET_KEY_BASE=${BETY_SECRET_KEY:-notasecret}
       - RAILS_RELATIVE_URL_ROOT=/bety
       - LOCAL_SERVER=${BETY_LOCAL_SERVER:-99}
+    volumes:
+      - bety:/home/bety/log
     depends_on:
       - postgres
     labels:
@@ -279,6 +281,7 @@ networks:
 volumes:
   traefik:
   postgres:
+  bety:
   rabbitmq:
   pecan:
   rstudio:

From 258a021f3a623a6c3f1f95713851ce5f4293ff64 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Tue, 6 Aug 2024 11:22:04 -0700
Subject: [PATCH 123/155] update links to sipnet repo

---
 models/sipnet/model_info.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/models/sipnet/model_info.json b/models/sipnet/model_info.json
index 7c0bca184cc..f24749928ab 100644
--- a/models/sipnet/model_info.json
+++ b/models/sipnet/model_info.json
@@ -7,9 +7,9 @@
   "creator": "Rob Kooper <kooper@illinois.edu>",
   "contributors": [],
   "links": {
-    "source": "http://someurl/code",
-    "issues": "http://someurl/issues",
-    "documentation": "http://someurl/wiki"
+    "source": "https://github.com/PecanProject/sipnet",
+    "issues": "https://github.com/PecanProject/sipnet/issues",
+    "documentation": "https://github.com/PecanProject/sipnet/tree/master/docs"
   },
   "inputs": {},
   "bibtex": []

From 85c304a244bb7750bdba05d322d456ccd2540d95 Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Wed, 7 Aug 2024 22:25:26 -0700
Subject: [PATCH 124/155] Update NAMESPACE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Leftover from removal of Roxygen `@import` in f56fd9c6 -- not sure why the tree-is-clean checks didn't catch this before merging 🤷
---
 modules/assim.sequential/NAMESPACE | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 1ec9253385a..db21f07876e 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -61,7 +61,6 @@ export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)
-import(ncdf4)
 import(nimble)
 importFrom(dplyr,"%>%")
 importFrom(lubridate,"%m+%")

From 4e83a3121a4f83d86efabec05c2b39a798beebaa Mon Sep 17 00:00:00 2001
From: Chris Black <chris@ckblack.org>
Date: Fri, 9 Aug 2024 15:59:32 -0700
Subject: [PATCH 125/155] remove stray empty file

Fairly sure I have deleted this at least once before; dont know why it keeps coming back
---
 scripts/Makefile.depends | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 scripts/Makefile.depends

diff --git a/scripts/Makefile.depends b/scripts/Makefile.depends
deleted file mode 100644
index adac6e7c09d..00000000000
--- a/scripts/Makefile.depends
+++ /dev/null
@@ -1 +0,0 @@
-# autogenerated

From db03e0e9960b033fe958681300e3398ddaa8f747 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 10 Aug 2024 16:51:28 +0530
Subject: [PATCH 126/155] Add batch normalization to CNN model

- Inserted batch normalization layers after convolutional and dense layers
- Aims to improve model stability and performance
- May help reduce internal covariate shift and allow for higher learning rates
---
 modules/assim.sequential/R/downscale_function.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 30280dab8d9..2e0eb13924a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -146,8 +146,10 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     for (i in seq_along(carbon_data)) {
       model <- keras3::keras_model_sequential() |>
         keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
+        keras3::layer_batch_normalization() |>
         keras3::layer_flatten() |>
         keras3::layer_dense(units = 64, activation = 'relu') |>
+        keras3::layer_batch_normalization() |>
         keras3::layer_dense(units = 1)
       
       model |> keras3::compile(

From 499ee2d0d3644661535fee767e9e99d673079e46 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 10 Aug 2024 17:01:38 +0530
Subject: [PATCH 127/155] Add dropout layers to CNN model

- Inserted dropout layers after batch normalization in convolutional and dense layers
- Set dropout rate to 0.3
- Aims to reduce overfitting and improve generalization
- May enhance model robustness and performance on unseen data
---
 modules/assim.sequential/R/downscale_function.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 2e0eb13924a..9c26343e728 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -147,9 +147,11 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       model <- keras3::keras_model_sequential() |>
         keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
         keras3::layer_batch_normalization() |>
+        keras3::layer_dropout(rate = 0.3) |>
         keras3::layer_flatten() |>
         keras3::layer_dense(units = 64, activation = 'relu') |>
         keras3::layer_batch_normalization() |>
+        keras3::layer_dropout(rate = 0.3) |>
         keras3::layer_dense(units = 1)
       
       model |> keras3::compile(

From cabc159ec364f7b26907734d6dc94f7803179ca7 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 10 Aug 2024 17:05:28 +0530
Subject: [PATCH 128/155] Add exponential decay learning rate scheduler

- Implemented learning rate scheduler using exponential decay
- Initial learning rate set to 0.001
- Decay steps: 1000, decay rate: 0.9
- Aims to improve training stability and convergence
- May help fine-tune model performance over training epochs
---
 modules/assim.sequential/R/downscale_function.R | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 9c26343e728..d56ce02cfe0 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -154,9 +154,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         keras3::layer_dropout(rate = 0.3) |>
         keras3::layer_dense(units = 1)
       
+      # Learning rate scheduler
+      lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
+        initial_learning_rate = 0.001,
+        decay_steps = 1000,
+        decay_rate = 0.9
+      )
+      
       model |> keras3::compile(
         loss = 'mean_squared_error',
-        optimizer = keras3::optimizer_adam(),
+        optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
         metrics = c('mean_absolute_error')
       )
       

From b9c5d5337c4a6b7fba374931170a2326ec2bf74e Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 10 Aug 2024 17:08:02 +0530
Subject: [PATCH 129/155] Implement early stopping in CNN model

- Added early stopping callback
- Monitor: validation loss
- Patience: 10 epochs
- Restore best weights: True
- Aims to prevent overfitting and optimize training duration
- May improve model generalization and reduce unnecessary computation
---
 modules/assim.sequential/R/downscale_function.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index d56ce02cfe0..c49061f0c0a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -166,6 +166,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
         metrics = c('mean_absolute_error')
       )
+
+      # Early stopping callback
+      early_stopping <- keras3::callback_early_stopping(
+        monitor = 'val_loss',
+        patience = 10,
+        restore_best_weights = TRUE
+      )
       
       model |> keras3::fit(
         x = x_train,
@@ -173,6 +180,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         epochs = 100,
         batch_size = 32,
         validation_split = 0.2,
+        callbacks = list(early_stopping),
         verbose = 0
       )
       

From ce26566190ff69ed628f3033744e0649b01a7ba4 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 10 Aug 2024 17:10:25 +0530
Subject: [PATCH 130/155] Increase maximum number of epochs in CNN model

- Raised max epochs from 100 to 500
- Allows for potentially longer training time
- Aims to give model more opportunity to learn complex patterns
- Works in conjunction with early stopping for optimal training duration
- May lead to improved model performance and accuracy
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index c49061f0c0a..5212724cc9b 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -177,7 +177,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       model |> keras3::fit(
         x = x_train,
         y = y_train[, i],
-        epochs = 100,
+        epochs = 500,  # Increased max epochs
         batch_size = 32,
         validation_split = 0.2,
         callbacks = list(early_stopping),

From ac0461d14a7accc8afdd1acb842f49846e0abe1d Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sat, 10 Aug 2024 17:12:29 +0530
Subject: [PATCH 131/155] Final refactor

made some final refactoring changes to keep the code standardised and even .
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 5212724cc9b..2cd7f282214 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -166,7 +166,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
         metrics = c('mean_absolute_error')
       )
-
+      
       # Early stopping callback
       early_stopping <- keras3::callback_early_stopping(
         monitor = 'val_loss',

From d2069e0e36ab4b8a2604ab9629dae89dc4c06b2f Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 11 Aug 2024 13:11:36 +0530
Subject: [PATCH 132/155] Added comments to architecture

added short and concise comments to the architecture in attempt to make the choice and the reasoning of the architecture design of the model self explanatory .
---
 modules/assim.sequential/R/downscale_function.R | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 2cd7f282214..64661b88d95 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,18 +140,28 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
+    # Reshape input data for CNN
     x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
     x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
     
     for (i in seq_along(carbon_data)) {
+      # Define the CNN model architecture
       model <- keras3::keras_model_sequential() |>
+        # 1D Convolutional layer: Extracts local features from input data
         keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
+        # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
         keras3::layer_batch_normalization() |>
+        # Dropout: Randomly sets 30% of inputs to 0, reducing overfitting and improving generalization
         keras3::layer_dropout(rate = 0.3) |>
+        # Flatten: Converts 3D output to 1D for dense layer input
         keras3::layer_flatten() |>
+        # Dense layer: Learns complex combinations of features
         keras3::layer_dense(units = 64, activation = 'relu') |>
+        # Second batch normalization: Further stabilizes learning in deeper layers
         keras3::layer_batch_normalization() |>
+        # Second dropout: Additional regularization to prevent overfitting in final layers
         keras3::layer_dropout(rate = 0.3) |>
+        # Output layer: Single neuron for regression prediction
         keras3::layer_dense(units = 1)
       
       # Learning rate scheduler

From e2cee084281f0f9d981d5ba125305c4a90cdccef Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 11 Aug 2024 13:23:46 +0530
Subject: [PATCH 133/155] comment for CNN predictions

added comments over the cnn prediction snippet
---
 modules/assim.sequential/R/downscale_function.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 64661b88d95..777d3566b45 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -195,7 +195,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       )
       
       models[[i]] <- model
-      
+
+      #CNN predictions
       cnn_predict <- function(model, newdata, scaling_params) {
         newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
         newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))

From ba4196d8efa9144976be3795f6efc8983fe0a196 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 11 Aug 2024 13:26:24 +0530
Subject: [PATCH 134/155] More code code comments

added more comments for clearity
---
 modules/assim.sequential/R/downscale_function.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 777d3566b45..d4974bab387 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -146,12 +146,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     
     for (i in seq_along(carbon_data)) {
       # Define the CNN model architecture
+      # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
       model <- keras3::keras_model_sequential() |>
         # 1D Convolutional layer: Extracts local features from input data
         keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
         # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
         keras3::layer_batch_normalization() |>
-        # Dropout: Randomly sets 30% of inputs to 0, reducing overfitting and improving generalization
+        # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
         keras3::layer_dropout(rate = 0.3) |>
         # Flatten: Converts 3D output to 1D for dense layer input
         keras3::layer_flatten() |>

From 8fdb1d5444ba4d4ac66ea3a5f9ee58325f394a4a Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Sun, 11 Aug 2024 13:29:19 +0530
Subject: [PATCH 135/155] yet more comments

this should resolve all the places comments should occur in the CNN . all the code snippets are successfully covered now .
---
 modules/assim.sequential/R/downscale_function.R | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index d4974bab387..27645f9dd79 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -171,7 +171,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         decay_steps = 1000,
         decay_rate = 0.9
       )
-      
+
+      # Compile the model
       model |> keras3::compile(
         loss = 'mean_squared_error',
         optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
@@ -184,7 +185,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         patience = 10,
         restore_best_weights = TRUE
       )
-      
+
+      # Train the model
       model |> keras3::fit(
         x = x_train,
         y = y_train[, i],
@@ -194,7 +196,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         callbacks = list(early_stopping),
         verbose = 0
       )
-      
+
+      # Store the trained model
       models[[i]] <- model
 
       #CNN predictions
@@ -204,12 +207,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         predictions <- stats::predict(model, newdata)
         return(as.vector(predictions))
       }
-      
+
+      # Create a prediction raster from covariates
       prediction_rast <- terra::rast(covariates)
+      
+      # Generate spatial predictions using the trained model
       maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
                                   fun = cnn_predict,
                                   scaling_params = scaling_params)
-      
+
+      # Make predictions on held-out test data
       predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
     }
   } else {

From 25fd5b6254cb55d9d983bb80108b7e78b91d14ce Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:43:03 +0530
Subject: [PATCH 136/155] Update downscale_function.R

Add L2 regularization to CNN model in SDA_downscale function

- Introduced L2 kernel regularization to convolutional and dense layers
- Set L2 factor to 0.01 for all regularized layers
- Aim to reduce overfitting and improve model generalization
- Updated model architecture in the CNN branch of SDA_downscale function
---
 modules/assim.sequential/R/downscale_function.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 27645f9dd79..99677466ecc 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -145,11 +145,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
     
     for (i in seq_along(carbon_data)) {
+      # L2 regularization factor
+      l2_factor <- 0.01
       # Define the CNN model architecture
       # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
       model <- keras3::keras_model_sequential() |>
         # 1D Convolutional layer: Extracts local features from input data
-        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
+        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
         # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
         keras3::layer_batch_normalization() |>
         # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
@@ -157,13 +159,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         # Flatten: Converts 3D output to 1D for dense layer input
         keras3::layer_flatten() |>
         # Dense layer: Learns complex combinations of features
-        keras3::layer_dense(units = 64, activation = 'relu') |>
+        keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
         # Second batch normalization: Further stabilizes learning in deeper layers
         keras3::layer_batch_normalization() |>
         # Second dropout: Additional regularization to prevent overfitting in final layers
         keras3::layer_dropout(rate = 0.3) |>
         # Output layer: Single neuron for regression prediction
-        keras3::layer_dense(units = 1)
+        keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
       
       # Learning rate scheduler
       lr_schedule <- keras3::learning_rate_schedule_exponential_decay(

From d4696e6083ed5167609e8a238c5d75faabdec562 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:52:40 +0530
Subject: [PATCH 137/155] Define k-folds and create cross-validation indices

- Set k_folds variable to 5 for k-fold cross-validation
- Use caret::createFolds to generate fold indices for training data
---
 modules/assim.sequential/R/downscale_function.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 99677466ecc..fede48920a7 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,9 +140,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
+    # Define k_folds within the function
+    k_folds <- 5
+    
     # Reshape input data for CNN
     x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
     x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+
+    # Create k-fold indices for cross-validation (only on training data)
+    fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
     
     for (i in seq_along(carbon_data)) {
       # L2 regularization factor

From 6a34666c849f70b22e9f2d5623c542c19ee02679 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:59:04 +0530
Subject: [PATCH 138/155] Implement cross-validation loop for ensemble modeling

- Initialize cv_results list to store results
- Create loop to process each fold
- Split data into training and validation sets for each fold
- Print progress message for each fold
---
 modules/assim.sequential/R/downscale_function.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index fede48920a7..311a90794bc 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -151,6 +151,20 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
     
     for (i in seq_along(carbon_data)) {
+
+    cv_results <- list()
+    
+    for (fold in 1:k_folds) {
+      cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
+      
+      # Split training data into training and validation sets for this fold
+      val_indices <- fold_indices[[fold]]
+      train_indices <- setdiff(1:nrow(x_train), val_indices)
+      
+      x_train_fold <- x_train[train_indices, , drop = FALSE]
+      y_train_fold <- y_train[train_indices, i]
+      x_val_fold <- x_train[val_indices, , drop = FALSE]
+      y_val_fold <- y_train[val_indices, i]
       # L2 regularization factor
       l2_factor <- 0.01
       # Define the CNN model architecture

From 4b16839a3d826745c917b8478cfba1ec559b5fda Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:02:56 +0530
Subject: [PATCH 139/155] Implement model training and evaluation within
 cross-validation

- Use tryCatch for error handling during model fitting
- Evaluate model on validation set
- Store results in cv_results list
- Handle potential errors and log them
---
 .../assim.sequential/R/downscale_function.R   | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 311a90794bc..61612948ff9 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -208,16 +208,24 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         restore_best_weights = TRUE
       )
 
-      # Train the model
-      model |> keras3::fit(
-        x = x_train,
-        y = y_train[, i],
-        epochs = 500,  # Increased max epochs
-        batch_size = 32,
-        validation_split = 0.2,
-        callbacks = list(early_stopping),
-        verbose = 0
-      )
+      tryCatch({
+          model |> keras3::fit(
+            x = x_train_fold,
+            y = y_train_fold,
+            epochs = 500,
+            batch_size = 32,
+            callbacks = list(early_stopping),
+            verbose = 0
+          )
+          
+          # Evaluate model on validation set
+          val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
+          cv_results[[fold]] <- val_results
+        }, error = function(e) {
+          cat("Error in fold", fold, ":", conditionMessage(e), "\n")
+          cv_results[[fold]] <- c(NA, NA)
+        })
+      }
 
       # Store the trained model
       models[[i]] <- model

From b22c84e4b598877fddd6f6c14bc20874ce411bde Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:06:49 +0530
Subject: [PATCH 140/155] Calculate and display average cross-validation
 performance

- Compute mean MSE and MAE across all folds
- Use sapply for efficient calculation
- Handle potential NA values
- Print results for the current ensemble
---
 modules/assim.sequential/R/downscale_function.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 61612948ff9..b4f71a662d4 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -227,6 +227,12 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         })
       }
 
+      # Calculate average performance across folds
+      mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
+      mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
+      
+      cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
+
       # Store the trained model
       models[[i]] <- model
 

From dce5d953d56a1006141ce06df1e26ca0e41a6b5c Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:26:03 +0530
Subject: [PATCH 141/155] Implement bagging for CNN in SDA_downscale function

- Add num_bags parameter to control number of bagged models
- Create and train multiple CNN models for each ensemble
- Implement bagged prediction function for CNN
- Update prediction process to use bagged models
- Adjust cross-validation to incorporate bagging
---
 .../assim.sequential/R/downscale_function.R   | 138 +++++++++---------
 1 file changed, 73 insertions(+), 65 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index b4f71a662d4..f58b51763e6 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -167,74 +167,82 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       y_val_fold <- y_train[val_indices, i]
       # L2 regularization factor
       l2_factor <- 0.01
-      # Define the CNN model architecture
-      # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
-      model <- keras3::keras_model_sequential() |>
-        # 1D Convolutional layer: Extracts local features from input data
-        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
-        # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
-        keras3::layer_batch_normalization() |>
-        # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
-        keras3::layer_dropout(rate = 0.3) |>
-        # Flatten: Converts 3D output to 1D for dense layer input
-        keras3::layer_flatten() |>
-        # Dense layer: Learns complex combinations of features
-        keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
-        # Second batch normalization: Further stabilizes learning in deeper layers
-        keras3::layer_batch_normalization() |>
-        # Second dropout: Additional regularization to prevent overfitting in final layers
-        keras3::layer_dropout(rate = 0.3) |>
-        # Output layer: Single neuron for regression prediction
-        keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
-      
-      # Learning rate scheduler
-      lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
-        initial_learning_rate = 0.001,
-        decay_steps = 1000,
-        decay_rate = 0.9
-      )
-
-      # Compile the model
-      model |> keras3::compile(
-        loss = 'mean_squared_error',
-        optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
-        metrics = c('mean_absolute_error')
-      )
-      
-      # Early stopping callback
-      early_stopping <- keras3::callback_early_stopping(
-        monitor = 'val_loss',
-        patience = 10,
-        restore_best_weights = TRUE
-      )
-
-      tryCatch({
-          model |> keras3::fit(
-            x = x_train_fold,
-            y = y_train_fold,
-            epochs = 500,
-            batch_size = 32,
-            callbacks = list(early_stopping),
-            verbose = 0
-          )
-          
-          # Evaluate model on validation set
-          val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
-          cv_results[[fold]] <- val_results
-        }, error = function(e) {
-          cat("Error in fold", fold, ":", conditionMessage(e), "\n")
-          cv_results[[fold]] <- c(NA, NA)
-        })
+      # Train final bagged models on all training data
+      final_bagged_models <- list()
+      for (bag in 1:num_bags) {
+        bootstrap_indices <- sample(1:nrow(x_train), size = nrow(x_train), replace = TRUE)
+        x_train_bag <- x_train[bootstrap_indices, ]
+        y_train_bag <- y_train[bootstrap_indices, i]
+        # Define the CNN model architecture
+        # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
+        model <- keras3::keras_model_sequential() |>
+          # 1D Convolutional layer: Extracts local features from input data
+          keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
+          # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
+          keras3::layer_batch_normalization() |>
+          # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
+          keras3::layer_dropout(rate = 0.3) |>
+          # Flatten: Converts 3D output to 1D for dense layer input
+          keras3::layer_flatten() |>
+          # Dense layer: Learns complex combinations of features
+          keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
+          # Second batch normalization: Further stabilizes learning in deeper layers
+          keras3::layer_batch_normalization() |>
+          # Second dropout: Additional regularization to prevent overfitting in final layers
+          keras3::layer_dropout(rate = 0.3) |>
+          # Output layer: Single neuron for regression prediction
+          keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
+        
+        # Learning rate scheduler
+        lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
+          initial_learning_rate = 0.001,
+          decay_steps = 1000,
+          decay_rate = 0.9
+        )
+  
+        # Compile the model
+        model |> keras3::compile(
+          loss = 'mean_squared_error',
+          optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
+          metrics = c('mean_absolute_error')
+        )
+        
+        # Early stopping callback
+        early_stopping <- keras3::callback_early_stopping(
+          monitor = 'val_loss',
+          patience = 10,
+          restore_best_weights = TRUE
+        )
+  
+        tryCatch({
+            model |> keras3::fit(
+              x = x_train_fold,
+              y = y_train_fold,
+              epochs = 500,
+              batch_size = 32,
+              callbacks = list(early_stopping),
+              verbose = 0
+            )
+        final_bagged_models[[bag]] <- final_model
+            
+            # Evaluate model on validation set
+            val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
+            cv_results[[fold]] <- val_results
+          }, error = function(e) {
+            cat("Error in fold", fold, ":", conditionMessage(e), "\n")
+            cv_results[[fold]] <- c(NA, NA)
+          })
+        }
+  
+        # Calculate average performance across folds
+        mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
+        mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
+        
+        cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
       }
 
-      # Calculate average performance across folds
-      mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
-      mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
-      
-      cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
-
       # Store the trained model
-      models[[i]] <- model
+      models[[i]] <- final_bagged_models
 
       #CNN predictions
       cnn_predict <- function(model, newdata, scaling_params) {

From e95536c965345fdfc1f0208d0865159e357e0fc2 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:33:12 +0530
Subject: [PATCH 142/155] Improve bagged CNN evaluation and standardize
 nomenclature

- Add final test set evaluation for each bagged CNN ensemble
- Calculate and display Test MSE and MAE for ensembles
- Standardize variable names for clarity and suiting the flow of the code avoiding conflicts .
---
 modules/assim.sequential/R/downscale_function.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index f58b51763e6..259ea304018 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -256,12 +256,18 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       prediction_rast <- terra::rast(covariates)
       
       # Generate spatial predictions using the trained model
-      maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
+      maps[[i]] <- terra::predict(prediction_rast, model = final_bagged_models,
                                   fun = cnn_predict,
                                   scaling_params = scaling_params)
 
       # Make predictions on held-out test data
-      predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
+      predictions[[i]] <- cnn_predict(final_bagged_models, x_data[-sample, ], scaling_params)
+
+      # Evaluate final bagged ensemble on test set
+      test_predictions <- cnn_predict(final_bagged_models, x_test, scaling_params)
+      test_mse <- mean((test_predictions - y_test[, i])^2)
+      test_mae <- mean(abs(test_predictions - y_test[, i]))
+      cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae))
     }
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")

From 85065566c9633d086ee6f18f58cf3a28cd209059 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:37:18 +0530
Subject: [PATCH 143/155] Added number of bags for bagging

- instead of passing as function arg , passed inside function
- set the number of bags to 5
---
 modules/assim.sequential/R/downscale_function.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 259ea304018..8a5504f672b 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -142,6 +142,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   } else if (model_type == "cnn") {
     # Define k_folds within the function
     k_folds <- 5
+    #Number of bags 
+    num_bags <- 5
     
     # Reshape input data for CNN
     x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))

From ced7975bc97c31ea54ad1f5e1a1120ef5d12554c Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:28:45 +0530
Subject: [PATCH 144/155] Refactor CNN model with k-fold cross-validation and
 bagging

- Implement k-fold cross-validation for CNN models
- Add bagging to improve model robustness
- Restructure model creation and training process
- Introduce ensemble prediction function
- Update evaluation metrics for each fold and final ensemble
- Improve code organization and readability
---
 .../assim.sequential/R/downscale_function.R   | 216 +++++++++---------
 1 file changed, 105 insertions(+), 111 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8a5504f672b..3838d1e3e0a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,133 +140,127 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
-    # Define k_folds within the function
+        # Define k_folds and num_bags
     k_folds <- 5
-    #Number of bags 
     num_bags <- 5
     
-    # Reshape input data for CNN
-    x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-
-    # Create k-fold indices for cross-validation (only on training data)
-    fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
-    
     for (i in seq_along(carbon_data)) {
-
-    cv_results <- list()
-    
-    for (fold in 1:k_folds) {
-      cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
+      all_models <- list()
       
-      # Split training data into training and validation sets for this fold
-      val_indices <- fold_indices[[fold]]
-      train_indices <- setdiff(1:nrow(x_train), val_indices)
+      # Create k-fold indices
+      fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
       
-      x_train_fold <- x_train[train_indices, , drop = FALSE]
-      y_train_fold <- y_train[train_indices, i]
-      x_val_fold <- x_train[val_indices, , drop = FALSE]
-      y_val_fold <- y_train[val_indices, i]
-      # L2 regularization factor
-      l2_factor <- 0.01
-      # Train final bagged models on all training data
-      final_bagged_models <- list()
-      for (bag in 1:num_bags) {
-        bootstrap_indices <- sample(1:nrow(x_train), size = nrow(x_train), replace = TRUE)
-        x_train_bag <- x_train[bootstrap_indices, ]
-        y_train_bag <- y_train[bootstrap_indices, i]
-        # Define the CNN model architecture
-        # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
-        model <- keras3::keras_model_sequential() |>
-          # 1D Convolutional layer: Extracts local features from input data
-          keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
-          # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
-          keras3::layer_batch_normalization() |>
-          # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
-          keras3::layer_dropout(rate = 0.3) |>
-          # Flatten: Converts 3D output to 1D for dense layer input
-          keras3::layer_flatten() |>
-          # Dense layer: Learns complex combinations of features
-          keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
-          # Second batch normalization: Further stabilizes learning in deeper layers
-          keras3::layer_batch_normalization() |>
-          # Second dropout: Additional regularization to prevent overfitting in final layers
-          keras3::layer_dropout(rate = 0.3) |>
-          # Output layer: Single neuron for regression prediction
-          keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
+      for (fold in 1:k_folds) {
+        cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
         
-        # Learning rate scheduler
-        lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
-          initial_learning_rate = 0.001,
-          decay_steps = 1000,
-          decay_rate = 0.9
-        )
-  
-        # Compile the model
-        model |> keras3::compile(
-          loss = 'mean_squared_error',
-          optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
-          metrics = c('mean_absolute_error')
-        )
+        # Split data into training and validation sets for this fold
+        train_indices <- setdiff(1:nrow(x_train), fold_indices[[fold]])
+        val_indices <- fold_indices[[fold]]
         
-        # Early stopping callback
-        early_stopping <- keras3::callback_early_stopping(
-          monitor = 'val_loss',
-          patience = 10,
-          restore_best_weights = TRUE
-        )
-  
-        tryCatch({
-            model |> keras3::fit(
-              x = x_train_fold,
-              y = y_train_fold,
-              epochs = 500,
-              batch_size = 32,
-              callbacks = list(early_stopping),
-              verbose = 0
+        x_train_fold <- x_train[train_indices, , drop = FALSE]
+        y_train_fold <- y_train[train_indices, i]
+        x_val_fold <- x_train[val_indices, , drop = FALSE]
+        y_val_fold <- y_train[val_indices, i]
+        
+        # Create bagged models for this fold
+        fold_models <- list()
+        for (bag in 1:num_bags) {
+          # Create bootstrap sample
+          bootstrap_indices <- sample(1:nrow(x_train_fold), size = nrow(x_train_fold), replace = TRUE)
+          x_train_bag <- x_train_fold[bootstrap_indices, ]
+          y_train_bag <- y_train_fold[bootstrap_indices]
+          
+          # Create and train model
+          model <- keras3::keras_model_sequential() |>
+            keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |>
+            keras3::layer_conv_2d(
+              filters = 32,
+              kernel_size = c(3, 1),
+              activation = 'relu',
+              padding = 'same'
+            ) |>
+            keras3::layer_flatten() |>
+            keras3::layer_dense(
+              units = 64, 
+              activation = 'relu',
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            ) |>
+            keras3::layer_batch_normalization() |>
+            keras3::layer_dropout(rate = 0.3) |>
+            keras3::layer_dense(
+              units = 32, 
+              activation = 'relu',
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            ) |>
+            keras3::layer_batch_normalization() |>
+            keras3::layer_dropout(rate = 0.3) |>
+            keras3::layer_dense(
+              units = 1,
+              kernel_regularizer = keras3::regularizer_l2(0.01)
             )
-        final_bagged_models[[bag]] <- final_model
-            
-            # Evaluate model on validation set
-            val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
-            cv_results[[fold]] <- val_results
-          }, error = function(e) {
-            cat("Error in fold", fold, ":", conditionMessage(e), "\n")
-            cv_results[[fold]] <- c(NA, NA)
-          })
+          
+          # Learning rate scheduler
+          lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
+            initial_learning_rate = 0.001,
+            decay_steps = 1000,
+            decay_rate = 0.9
+          )
+          
+          # Early stopping callback
+          early_stopping <- keras3::callback_early_stopping(
+            monitor = 'loss',
+            patience = 10,
+            restore_best_weights = TRUE
+          )
+          
+          model |> keras3::compile(
+            loss = 'mean_squared_error',
+            optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
+            metrics = c('mean_absolute_error')
+          )
+          
+          model |> keras3::fit(
+            x = x_train_bag,
+            y = y_train_bag,
+            epochs = 500,
+            batch_size = 32,
+            callbacks = list(early_stopping),
+            verbose = 0
+          )
+          
+          fold_models[[bag]] <- model
         }
-  
-        # Calculate average performance across folds
-        mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
-        mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
         
-        cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
+        # Add fold models to all_models list
+        all_models <- c(all_models, fold_models)
+        
+        # Evaluate fold performance
+        val_predictions <- sapply(fold_models, function(m) stats::predict(m, x_val_fold))
+        val_predictions_mean <- rowMeans(val_predictions)
+        val_mse <- mean((val_predictions_mean - y_val_fold)^2)
+        val_mae <- mean(abs(val_predictions_mean - y_val_fold))
+        cat(sprintf("Fold %d - MSE: %.4f, MAE: %.4f\n", fold, val_mse, val_mae))
       }
-
-      # Store the trained model
-      models[[i]] <- final_bagged_models
-
-      #CNN predictions
-      cnn_predict <- function(model, newdata, scaling_params) {
+      
+      # Store all models for this ensemble
+      models[[i]] <- all_models
+      
+      # Use all models for predictions
+      cnn_ensemble_predict <- function(models, newdata, scaling_params) {
         newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
-        newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
-        predictions <- stats::predict(model, newdata)
-        return(as.vector(predictions))
+        predictions <- sapply(models, function(m) stats::predict(m, newdata))
+        return(rowMeans(predictions))
       }
-
-      # Create a prediction raster from covariates
-      prediction_rast <- terra::rast(covariates)
       
-      # Generate spatial predictions using the trained model
-      maps[[i]] <- terra::predict(prediction_rast, model = final_bagged_models,
-                                  fun = cnn_predict,
+      prediction_rast <- terra::rast(covariates)
+      maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
+                                  fun = cnn_ensemble_predict,
                                   scaling_params = scaling_params)
-
-      # Make predictions on held-out test data
-      predictions[[i]] <- cnn_predict(final_bagged_models, x_data[-sample, ], scaling_params)
-
-      # Evaluate final bagged ensemble on test set
-      test_predictions <- cnn_predict(final_bagged_models, x_test, scaling_params)
+      
+      predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
+      
+      # Evaluate final ensemble on test set
+      test_predictions <- cnn_ensemble_predict(models[[i]], x_test, scaling_params)
       test_mse <- mean((test_predictions - y_test[, i])^2)
       test_mae <- mean(abs(test_predictions - y_test[, i]))
       cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae))

From 871d78ee53df2ba34111ea1a00b3d5fad5fca0d4 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:46:05 +0530
Subject: [PATCH 145/155] Added comments

some snippets require comments to explain their purpose , they have been added as well , as well as some past comments have been modified to aptly suit the changes .
---
 .../assim.sequential/R/downscale_function.R   | 41 +++++++++++++++----
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 3838d1e3e0a..6de48ba3a13 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,16 +140,21 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
-        # Define k_folds and num_bags
+    # Define k_folds and num_bags
     k_folds <- 5
     num_bags <- 5
-    
+
+    # Reshape input data for CNN
+    x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+
     for (i in seq_along(carbon_data)) {
       all_models <- list()
       
       # Create k-fold indices
       fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
-      
+
+      #initialise operations for each fold
       for (fold in 1:k_folds) {
         cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
         
@@ -170,30 +175,41 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
           x_train_bag <- x_train_fold[bootstrap_indices, ]
           y_train_bag <- y_train_fold[bootstrap_indices]
           
-          # Create and train model
+          # Define the CNN model architecture
+          # Used dual batch normalization and dropout as the first set of batch normalization and 
           model <- keras3::keras_model_sequential() |>
+            # Layer Reshape : Reshape to fit target shape for the convolutional layer
             keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |>
+            # 1D Convolutional layer: Extracts local features from input data
             keras3::layer_conv_2d(
               filters = 32,
               kernel_size = c(3, 1),
               activation = 'relu',
               padding = 'same'
             ) |>
+            # Flatten: Converts 3D output to 1D for dense layer input
             keras3::layer_flatten() |>
+            # Dense layer: Learns complex combinations of features
             keras3::layer_dense(
               units = 64, 
               activation = 'relu',
               kernel_regularizer = keras3::regularizer_l2(0.01)
             ) |>
+            # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
             keras3::layer_batch_normalization() |>
+            # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
             keras3::layer_dropout(rate = 0.3) |>
+            # Dense layer: Learns complex combinations of features
             keras3::layer_dense(
               units = 32, 
               activation = 'relu',
               kernel_regularizer = keras3::regularizer_l2(0.01)
             ) |>
+            # Batch normalization: Further stabilizes learning in deeper layers
             keras3::layer_batch_normalization() |>
+            # Dropout: Additional regularization to prevent overfitting in final layer
             keras3::layer_dropout(rate = 0.3) |>
+            # Output layer: Single neuron for regression prediction
             keras3::layer_dense(
               units = 1,
               kernel_regularizer = keras3::regularizer_l2(0.01)
@@ -212,13 +228,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
             patience = 10,
             restore_best_weights = TRUE
           )
-          
+
+          # Compile the model
           model |> keras3::compile(
             loss = 'mean_squared_error',
             optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
             metrics = c('mean_absolute_error')
           )
-          
+
+          # Train the model
           model |> keras3::fit(
             x = x_train_bag,
             y = y_train_bag,
@@ -227,7 +245,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
             callbacks = list(early_stopping),
             verbose = 0
           )
-          
+
+          # Store the trained model for this bag in the fold_models list
           fold_models[[bag]] <- model
         }
         
@@ -251,12 +270,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         predictions <- sapply(models, function(m) stats::predict(m, newdata))
         return(rowMeans(predictions))
       }
-      
+
+      # Create a prediction raster from covariates
       prediction_rast <- terra::rast(covariates)
+
+      # Generate spatial predictions using the trained model
       maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
                                   fun = cnn_ensemble_predict,
                                   scaling_params = scaling_params)
-      
+
+      # Make predictions on held-out test data
       predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
       
       # Evaluate final ensemble on test set

From b73290314fb334908950cea73258088998be0192 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:13:37 +0530
Subject: [PATCH 146/155] Removed MAE ,MSE from downscale

- MAE , MSE already in metrics function
- reimplementing it makes it redundant
- Removed redundant implementation from downscale function
---
 modules/assim.sequential/R/downscale_function.R | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 6de48ba3a13..ced55e56c63 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -252,13 +252,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         
         # Add fold models to all_models list
         all_models <- c(all_models, fold_models)
-        
-        # Evaluate fold performance
-        val_predictions <- sapply(fold_models, function(m) stats::predict(m, x_val_fold))
-        val_predictions_mean <- rowMeans(val_predictions)
-        val_mse <- mean((val_predictions_mean - y_val_fold)^2)
-        val_mae <- mean(abs(val_predictions_mean - y_val_fold))
-        cat(sprintf("Fold %d - MSE: %.4f, MAE: %.4f\n", fold, val_mse, val_mae))
       }
       
       # Store all models for this ensemble
@@ -282,11 +275,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       # Make predictions on held-out test data
       predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
       
-      # Evaluate final ensemble on test set
-      test_predictions <- cnn_ensemble_predict(models[[i]], x_test, scaling_params)
-      test_mse <- mean((test_predictions - y_test[, i])^2)
-      test_mae <- mean(abs(test_predictions - y_test[, i]))
-      cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae))
     }
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")

From ba7b701ad03e62a1ce43a6ee9fa507eaf2a87a45 Mon Sep 17 00:00:00 2001
From: David LeBauer <dlebauer@gmail.com>
Date: Tue, 13 Aug 2024 17:54:57 -0700
Subject: [PATCH 147/155] Create CODE_OF_CONDUCT.md

Added copy of COC to root directory for visibility, per GitHub's recommended community standards https://github.com/PecanProject/pecan/community
---
 CODE_OF_CONDUCT.md | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 CODE_OF_CONDUCT.md

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000000..776a2fd15e4
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,45 @@
+# Contributor Covenant Code of Conduct
+
+**Our Pledge**
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+**Our Standards**
+
+Examples of behavior that contributes to creating a positive environment include:
+
+   * Using welcoming and inclusive language
+   * Being respectful of differing viewpoints and experiences
+   * Gracefully accepting constructive criticism
+   * Focusing on what is best for the community
+   * Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a professional setting
+
+
+
+**Our Responsibilities**
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+**Scope**
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+**Enforcement**
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at pecanproj[at]gmail.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
+
+**Attribution**
+
+This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org/)  version 1.4, available at [http://contributor-covenant.org/version/1/4](http://contributor-covenant.org/version/1/4/).

From becd520de541020e4c6f59a4c019851b24ed60de Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 14 Aug 2024 19:55:45 +0530
Subject: [PATCH 148/155] Minor Documentation Changes

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 DEV-INTRO.md | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/DEV-INTRO.md b/DEV-INTRO.md
index b8d17aa9eb7..465a071f9ff 100644
--- a/DEV-INTRO.md
+++ b/DEV-INTRO.md
@@ -78,7 +78,6 @@ You can copy the [`docker/env.example`](docker/env.example) file as .env in your
 cp docker/env.example .env
 ```
 
-
 The variables we want to modify are:
 
 - `COMPOSE_PROJECT_NAME`, the prefix for all containers. Set this to "pecan".
@@ -181,13 +180,13 @@ Next copy the R packages from a container to volume `pecan_lib`. This is not rea
 
 You can copy all the data using the following command. This will copy all compiled packages to your local machine.
 
-```
+```bash
 docker run -ti --rm -v pecan_R_library:/rlib pecan/base:develop cp -a /usr/local/lib/R/site-library/. /rlib/
 ```
 
 If you have set a custom UID or GID in your `.env`, change ownership of these files as described above for the data volume. E.g. if you use the same UID in the containers as on your host machine, run:
 
-```
+```bash
 docker run -ti --rm -v pecan_R_library:/rlib pecan/base:develop chown -R "$(id -u):$(id -g)" /rlib/
 ```
 
@@ -210,7 +209,7 @@ For Windows
 copy docker\web\config.docker.php web\config.php
 ```
 
-## PEcAn Development
+## PEcAn Development Setup
 
 To begin development we first have to bring up the full PEcAn stack. This assumes you have done once the steps above. You don\'t need to stop any running containers, you can use the following command to start all containers. At this point you have PEcAn running in docker.
 
@@ -239,13 +238,13 @@ R CMD ../web/workflow.R --settings docker.sipnet.xml
 
 A better way of doing this is developed as part of GSOC, in which case you can leverage of the restful interface defined, or using the new R PEcAn API package.
 
-# PEcAn URLs
+## PEcAn URLs
 
 You can check the RabbitMQ server used by pecan using <https://rabbitmq.pecan.localhost> on the same server that the docker stack is running on. You can use rstudio either with <http://server/rstudio> or at <http://rstudio.pecan.localhost>. To check the traefik dashboard you can use <http://traefik.pecan.localhost>.
 
 If the stack is running on a remote machine, you can use ssh and port forwarding to connect to the server. For example `ssh -L 8000:localhost:80` will allow you to use <http://rabbitmq.pecan.localhost:8000/> in your browser to connect to the remote PEcAn server RabbitMQ.
 
-# Directory Structure
+## Directory Structure
 
 Following are the main folders inside the pecan repository.
 
@@ -281,9 +280,9 @@ Some of the docker build files. The Dockerfiles for each model are placed in the
 
 Small scripts that are used as part of the development and installation of PEcAn.
 
-# Advanced Development Options
+## Advanced Development Options
 
-## Reset all containers/database
+### Reset all containers/database
 
 If you want to start from scratch and remove all old data, but keep your pecan checked out folder, you can remove the folders where you have written the data (see `folders` below). You will also need to remove any of the docker managed volumes. To see all volumes you can do `docker volume ls -q -f name=pecan`. If you are sure, you can either remove them one by one, or remove them all at once using the command below. **THIS DESTROYS ALL DATA IN DOCKER MANAGED VOLUMES.**.
 

From e7f95cc2a5a8e608af8b0c0c1dc233bd468b2cb4 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 14 Aug 2024 20:21:30 +0530
Subject: [PATCH 149/155] Restore addSecrets tests

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 .../settings/tests/testthat/test.addSecrets.R | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/base/settings/tests/testthat/test.addSecrets.R b/base/settings/tests/testthat/test.addSecrets.R
index e7fa1fa8729..b100f286efd 100644
--- a/base/settings/tests/testthat/test.addSecrets.R
+++ b/base/settings/tests/testthat/test.addSecrets.R
@@ -36,6 +36,35 @@ test_that("`addSecrets` adds secret settings when force is TRUE and secrets have
   expect_equal(updated_settings$database$section$password, "pecan")  
 })
 
+test_that("`addSecrets` adds secret settings when force is FALSE and secrets have not been added", {
+  settings <- list(
+    settings.info = list(
+      secrets.added = FALSE
+    ),
+    browndog = list()
+  )
+
+  mocked_xmlToList_result <- list(
+    database = list(
+      section = list(
+        name = "pecan",
+        password = "pecan"
+      )
+    ),
+    browndog = list(
+      section = list(
+        name = "pecan"
+      )
+    )
+  )
+  mockery::stub(addSecrets, 'file.exists', TRUE)
+  mockery::stub(addSecrets, 'xmlToList', mocked_xmlToList_result)
+  updated_settings <- addSecrets(settings, force = FALSE)
+  expect_equal(updated_settings$database$section$name, "pecan")
+  expect_equal(updated_settings$database$section$password, "pecan")
+  expect_equal(updated_settings$browndog$section$name, "pecan")
+})
+
 test_that("`addSecrets` adds secret settings when force is FALSE and secrets have not been added", {
   settings <- list(
     settings.info = list(

From 327b1d5efd80f91f9a5c9a8687bbd9e446bc26a5 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 14 Aug 2024 20:40:17 +0530
Subject: [PATCH 150/155] Remove browndog from test

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/settings/tests/testthat/test.addSecrets.R | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/base/settings/tests/testthat/test.addSecrets.R b/base/settings/tests/testthat/test.addSecrets.R
index b100f286efd..5eb94a3eed2 100644
--- a/base/settings/tests/testthat/test.addSecrets.R
+++ b/base/settings/tests/testthat/test.addSecrets.R
@@ -1,3 +1,4 @@
+source("base/settings/R/addSecrets.R")
 test_that("`addSecrets` returns settings without updating them when `~/.pecan.xml` does not exist", {
   settings <- list()
   mockery::stub(addSecrets, 'file.exists', FALSE)
@@ -79,11 +80,6 @@ test_that("`addSecrets` adds secret settings when force is FALSE and secrets hav
         name = "pecan",
         password = "pecan"
       )
-    ),
-    browndog = list(
-      section = list(
-        name = "pecan"
-      )
     )
   )
   mockery::stub(addSecrets, 'file.exists', TRUE)
@@ -91,5 +87,4 @@ test_that("`addSecrets` adds secret settings when force is FALSE and secrets hav
   updated_settings <- addSecrets(settings, force = FALSE)
   expect_equal(updated_settings$database$section$name, "pecan")
   expect_equal(updated_settings$database$section$password, "pecan")
-  expect_equal(updated_settings$browndog$section$name, "pecan")
 })
\ No newline at end of file

From 6b7b3afaf1486cc16c3497fe2b9e57732d8bcc97 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 14 Aug 2024 20:41:24 +0530
Subject: [PATCH 151/155] Revert back changes

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 .../settings/tests/testthat/test.addSecrets.R | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/base/settings/tests/testthat/test.addSecrets.R b/base/settings/tests/testthat/test.addSecrets.R
index 5eb94a3eed2..e7fa1fa8729 100644
--- a/base/settings/tests/testthat/test.addSecrets.R
+++ b/base/settings/tests/testthat/test.addSecrets.R
@@ -1,4 +1,3 @@
-source("base/settings/R/addSecrets.R")
 test_that("`addSecrets` returns settings without updating them when `~/.pecan.xml` does not exist", {
   settings <- list()
   mockery::stub(addSecrets, 'file.exists', FALSE)
@@ -64,27 +63,4 @@ test_that("`addSecrets` adds secret settings when force is FALSE and secrets hav
   expect_equal(updated_settings$database$section$name, "pecan")
   expect_equal(updated_settings$database$section$password, "pecan")
   expect_equal(updated_settings$browndog$section$name, "pecan")
-})
-
-test_that("`addSecrets` adds secret settings when force is FALSE and secrets have not been added", {
-  settings <- list(
-    settings.info = list(
-      secrets.added = FALSE
-    ),
-    browndog = list()
-  )
-
-  mocked_xmlToList_result <- list(
-    database = list(
-      section = list(
-        name = "pecan",
-        password = "pecan"
-      )
-    )
-  )
-  mockery::stub(addSecrets, 'file.exists', TRUE)
-  mockery::stub(addSecrets, 'xmlToList', mocked_xmlToList_result)
-  updated_settings <- addSecrets(settings, force = FALSE)
-  expect_equal(updated_settings$database$section$name, "pecan")
-  expect_equal(updated_settings$database$section$password, "pecan")
 })
\ No newline at end of file

From bf024780296e8c05680ad19b9abaa3d61c0b4611 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:40:41 +0530
Subject: [PATCH 152/155] Improve robustness of train-validation split in
 SDA_downscale function

- Replace 1:nrow(x_train) with seq_len(nrow(x_train)) for indexing
- Ensures proper handling of edge cases, including empty datasets
- Addresses potential "subscript out of bounds" error in CNN model training
---
 modules/assim.sequential/R/downscale_function.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index ced55e56c63..439de001a2b 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -152,14 +152,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       all_models <- list()
       
       # Create k-fold indices
-      fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
+      fold_indices <- caret::createFolds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
 
       #initialise operations for each fold
       for (fold in 1:k_folds) {
         cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
         
         # Split data into training and validation sets for this fold
-        train_indices <- setdiff(1:nrow(x_train), fold_indices[[fold]])
+        train_indices <- setdiff(seq_len(nrow(x_train)), fold_indices[[fold]])
         val_indices <- fold_indices[[fold]]
         
         x_train_fold <- x_train[train_indices, , drop = FALSE]

From 4ae1974d7aa78fa27e188d222e85ab0eba14b773 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:10:07 +0530
Subject: [PATCH 153/155] Implement custom create_folds function to replace
 caret dependency

- Add create_folds function for k-fold cross-validation
- Removes dependency on caret::createFolds
- Supports both training and test set index generation
- Allows flexible output as list or vector
---
 .../assim.sequential/R/downscale_function.R    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 439de001a2b..f4d3700f189 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -62,6 +62,24 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
+create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) {
+  n <- length(y)
+  indices <- seq_len(n)
+  folds <- split(indices, cut(seq_len(n), breaks = k, labels = FALSE))
+  
+  if (!returnTrain) {
+    folds <- folds  # Test indices are already what we want
+  } else {
+    folds <- lapply(folds, function(x) indices[-x])  # Return training indices
+  }
+  
+  if (!list) {
+    folds <- unlist(folds)
+  }
+  
+  return(folds)
+}
+
 ##' @title SDA Downscale Function
 ##' @name SDA_downscale
 ##' @author Joshua Ploshay, Sambhav Dixit

From 5d86312d92fe03adeb3a77665b96db4ec2dcda45 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:10:57 +0530
Subject: [PATCH 154/155] Add comprehensive roxygen documentation for
 create_folds function

- Include title, name, and author tags
- Detail all function parameters
- Provide clear description and details sections
- Specify return value format
- Align documentation style with project standards
---
 modules/assim.sequential/R/downscale_function.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index f4d3700f189..dca71817181 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -62,6 +62,20 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
+##' @title Create folds function
+##' @name create_folds
+##' @author Sambhav Dixit
+##'
+##' @param y Vector. A vector of outcome data or indices.
+##' @param k Numeric. The number of folds to create.
+##' @param list Logical. If TRUE, returns a list of fold indices. If FALSE, returns a vector.
+##' @param returnTrain Logical. If TRUE, returns indices for training sets. If FALSE, returns indices for test sets.
+##' @details This function creates k-fold indices for cross-validation. It can return either training or test set indices, and the output can be in list or vector format.
+##'
+##' @description This function generates k-fold indices for cross-validation, allowing for flexible output formats.
+##'
+##' @return A list of k elements (if list = TRUE), each containing indices for a fold, or a vector of indices (if list = FALSE).
+
 create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) {
   n <- length(y)
   indices <- seq_len(n)

From 70bfdaf5ac40a69309d78b063c28d41576d5603d Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:16:08 +0530
Subject: [PATCH 155/155] Replace carets

- Replace caretFolds -> caret_folds
- Remove carets:: dependency
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index dca71817181..54ce13d2e59 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -184,7 +184,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       all_models <- list()
       
       # Create k-fold indices
-      fold_indices <- caret::createFolds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
+      fold_indices <- create_folds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
 
       #initialise operations for each fold
       for (fold in 1:k_folds) {