Skip to content

Commit

Permalink
Merge pull request #95 from Spatiotemporal-Exposures-and-Toxicology/c…
Browse files Browse the repository at this point in the history
…leaning_pipeline

Cleaning pipeline
  • Loading branch information
eva0marques authored Dec 15, 2023
2 parents f2a2f7d + e391a8a commit 5dfdf1e
Show file tree
Hide file tree
Showing 10 changed files with 382 additions and 1,318 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(add_build_fp)
export(add_build_h)
export(add_canopy_h)
export(add_cov)
export(add_dem)
export(add_era5_rast)
export(add_era5_vect)
Expand All @@ -11,6 +12,7 @@ export(add_imp)
export(add_nlcd_ratio)
export(add_tcc)
export(add_terrain)
export(build_h_table)
export(check_obs)
export(check_pred_grid)
export(compute_rmse_cv)
Expand All @@ -27,11 +29,15 @@ export(create_pred_rds)
export(create_sp_fold)
export(create_starrayobj)
export(create_stdtobj)
export(format_econet_aws)
export(format_noaa_aws)
export(is_starrayobj)
export(is_stdtobj)
export(list_covar_nc)
export(list_monitors_nc)
export(map_res_cv)
export(map_rmse_cv)
export(nlcd_table)
export(plot_reg)
export(plot_reg_cv)
export(plot_res)
Expand Down
75 changes: 75 additions & 0 deletions R/process_monitors.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,78 @@ format_noaa_aws <- function(filename) {
locs <- locs[, na_flag := (nb_na_tn > 5 | nb_na_tx > 5), ]
return(list(obs = obs, locs = locs))
}

#' Format econet observations
#'
#' @param path character path to econet observations file
#' @returns a list with two datatable, one for the whole observations and
#' the other with the location metadata only
#' @export
format_econet_aws <- function(path) {
na_flag <- NULL
files <- list.files(path = path, pattern = "M*.xlsx")
obs <- do.call("rbind", lapply(files, FUN = function(file) {
openxlsx::read.xlsx(paste0(path, file))
}))
colnames(obs) <- c("time", "tx", "tn", "id", "network", "name",
"county_default", "lat", "lon", "elev", "support")
obs[obs == "QCF"] <- NA
obs$tx <- as.numeric(obs$tx)
obs$tn <- as.numeric(obs$tn)
obs$time <- as.Date(obs$time, format = "%Y-%m-%d")
obs <- data.table::data.table(obs)
obs$tn <- (obs$tn - 32) * 5 / 9
obs$tx <- (obs$tx - 32) * 5 / 9
# flag stations with >5% missing data in tn or tx
obs_ts_tn <- maditr::dcast(
obs[, c("time", "id", "tn")],
time ~ id,
value.var = "tn"
) %>%
xts::as.xts()
nb_na_tn <- lapply(obs_ts_tn, FUN = function(x) sum(is.na(x)))
nb_na_tn <- as.data.frame(do.call(rbind, nb_na_tn))
nb_na_tn <- cbind(id = rownames(nb_na_tn), nb_na_tn)
rownames(nb_na_tn) <- seq_len(nrow(nb_na_tn))
names(nb_na_tn)[names(nb_na_tn) == "V1"] <- "nb_na_tn"
obs_ts_tx <- maditr::dcast(
obs[, c("time", "id", "tx")],
time ~ id,
value.var = "tx"
) %>%
xts::as.xts()
nb_na_tx <- lapply(obs_ts_tx, FUN = function(x) sum(is.na(x)))
nb_na_tx <- as.data.frame(do.call(rbind, nb_na_tx))
nb_na_tx <- cbind(id = rownames(nb_na_tx), nb_na_tx)
rownames(nb_na_tx) <- seq_len(nrow(nb_na_tx))
names(nb_na_tx)[names(nb_na_tx) == "V1"] <- "nb_na_tx"
locs <- unique(obs[, c("id", "name", "network", "lat", "lon", "elev")])
locs <- list(locs, nb_na_tx, nb_na_tn) %>%
purrr::reduce(dplyr::full_join, by = "id") %>%
data.table::data.table()
obs <- list(obs, nb_na_tx, nb_na_tn) %>%
purrr::reduce(dplyr::full_join, by = "id") %>%
data.table::data.table()
obs <- obs[, na_flag := (nb_na_tn > 5 | nb_na_tx > 5), ]
locs <- locs[, na_flag := (nb_na_tn > 5 | nb_na_tx > 5), ]
return(list(obs = obs, locs = locs))
}

#' Add all covariates to observation SpatVector
#'
#' @param obs_spvect a SpatVector of AWS observations
#' @param covar_files list of character path to covariates files
#' @returns a SpatVector with all covariates added
#' @export
add_cov <- function(obs_spvect, covar_files) {
output <- add_imp(covar_files$imp, obs_spvect) %>%
add_tcc(tcc_path = covar_files$tcc) %>%
add_terrain(dem_path = covar_files$dem) %>%
add_canopy_h(canopy_h_path = covar_files$canopy_h) %>%
add_build_fp(build_fp_path = covar_files$build_fp) %>%
add_build_h(build_h_path = covar_files$build_h) %>%
add_nlcd_ratio(nlcd_path = covar_files$nlcd) %>%
add_county(county_path = covar_files$county) %>%
add_era5_vect(era5_path = covar_files$era5)
return(output)
}
35 changes: 35 additions & 0 deletions R/upload_metadata.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#' Give a list of covariate files names
#'
#' @param covar_folder character path to covariates files
#' @returns a list of covariate file paths
#' @export
list_covar_nc <- function(covar_folder) {
covar_files <- list(
imp = paste0(covar_folder, "NC_imperviousness_2019.tif"),
Expand All @@ -24,6 +29,32 @@ list_covar_nc <- function(covar_folder) {
return(covar_files)
}


#' Give a list of monitor files names
#'
#' @param folder character path to monitors folder
#' @returns a list of monitors file paths
#' @export
list_monitors_nc <- function(folder) {
files <- list(
noaa = paste0(folder,
"NC-AWS-NOAA-dailysummary-20220601-20220831.csv"),
econet = paste0(folder,
"ECONET-stations/",
"NC-econet-dailysummary-20220601-20220831.csv"),
if (!all(sapply(files, file.exists))) {
warning("Some of the files do not exist.")
}
)
return(files)
}



#' Building height metadata
#'
#' @returns a data.table with all building height metadata
#' @export
build_h_table <- function() {
table <- data.table::as.data.table(
list(
Expand All @@ -44,6 +75,10 @@ build_h_table <- function() {
return(table)
}

#' National Land Cover Dataset metadata (NLCD)
#'
#' @returns a data.table with all national land cover dataset metadata
#' @export
nlcd_table <- function() {
table <- data.table::as.data.table(
list(value = c(0, 11, 21, 22, 23, 24, 31, 41, 42, 43, 52,
Expand Down
Loading

0 comments on commit 5dfdf1e

Please sign in to comment.