diff --git a/base/qaqc/DESCRIPTION b/base/qaqc/DESCRIPTION index bd0a59fc1ad..a723730270e 100644 --- a/base/qaqc/DESCRIPTION +++ b/base/qaqc/DESCRIPTION @@ -21,6 +21,7 @@ Imports: stats Suggests: knitr, + mockery, mvbutils, PEcAn.BIOCRO, PEcAn.ED2, @@ -28,7 +29,8 @@ Suggests: PEcAn.utils, rmarkdown, testthat (>= 3.0.4), - vdiffr (>= 1.0.2) + vdiffr (>= 1.0.2), + withr X-Comment-Remotes: Installing vdiffr from GitHub because as of 2021-09-23, this is the easiest way to get version >= 1.0.2 onto Docker images that use older diff --git a/base/qaqc/R/find_formats_without_inputs.R b/base/qaqc/R/find_formats_without_inputs.R index b6df894b0ac..1107213c926 100644 --- a/base/qaqc/R/find_formats_without_inputs.R +++ b/base/qaqc/R/find_formats_without_inputs.R @@ -5,13 +5,13 @@ ##' @param con database connection object ##' @param user_id_code Optional parameter to search by user_id ##' @param created_after Optional parameter to search by creation date. Date must be in form 'YYYY-MM-DD'. -##' @param created_before Optional parameter to search by creation date. Can be used in conjunciton with created_after to specify a spesific window. Date must be in form 'YYYY-MM-DD'. -##' @param updated_after Optional parameter to search all entried updated after a certain date. Date must be in form 'YYYY-MM-DD'. -##' @param updated_before Optional parameter to search all entried updated before a certain date. Date must be in form 'YYYY-MM-DD'. +##' @param created_before Optional parameter to search by creation date. Can be used in conjunction with created_after to specify a specific window. Date must be in form 'YYYY-MM-DD'. +##' @param updated_after Optional parameter to search all entries updated after a certain date. Date must be in form 'YYYY-MM-DD'. +##' @param updated_before Optional parameter to search all entries updated before a certain date. Date must be in form 'YYYY-MM-DD'. ##' @param con connection the the bety database ##' ##' -##' @description This is a fucntion that returns a dataframe with all of the format entries that have no assosiated input records. +##' @description This is a function that returns a dataframe with all of the format entries that have no associated input records. ##' ##' For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder ##' or look at the README diff --git a/base/qaqc/R/find_inputs_without_formats.R b/base/qaqc/R/find_inputs_without_formats.R index ba5ff91a2ba..0c21fd71b2d 100644 --- a/base/qaqc/R/find_inputs_without_formats.R +++ b/base/qaqc/R/find_inputs_without_formats.R @@ -7,7 +7,7 @@ ##' @param con connection the the bety database ##' ##' -##' @description This is a function that returns a dataframe with all of the input entries that have no assosiated format records. +##' @description This is a function that returns a dataframe with all of the input entries that have no associated format records. ##' This is very rare in the database. ##' ##' For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder diff --git a/base/qaqc/R/get_table_column_names.R b/base/qaqc/R/get_table_column_names.R index 8a5a8bfe5ac..9c90bc81852 100644 --- a/base/qaqc/R/get_table_column_names.R +++ b/base/qaqc/R/get_table_column_names.R @@ -1,38 +1,35 @@ ##' get_table_column_names ##' @author Tempest McCabe -##' -##' @param table a table that is output from one of the find_* functions, -##' or a data.frame containing the output from multiple find_* functions. Could also be a vector of table names. -##' @param con a connection to the bety database. -##' -##' -##' @description This function will return a vector of the column names for a given table(s) in the bety database. -##' Useful for choseing which columns to include in the written-out table. +##' +##' @param table a table that is output from one of the find_* functions, +##' or a data.frame containing the output from multiple find_* functions. Could also be a vector of table names. +##' @param con a connection to the bety database. +##' +##' +##' @description This function will return a vector of the column names for a given table(s) in the bety database. +##' Useful for choosing which columns to include in the written-out table. ##' ##' For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder ##' or look at the README ##' @export -get_table_column_names<-function(table, con){ - - if(is.data.frame(table)){ - if("table_name" %in% names(table)){ - table_factor<-as.factor(table$table_name) - table_name<-levels(table_factor) - }else{ +get_table_column_names <- function(table, con) { + if (is.data.frame(table)) { + if ("table_name" %in% names(table)) { + table_factor <- as.factor(table$table_name) + table_name <- levels(table_factor) + } else { PEcAn.logger::logger.severe("Table needs either a 'table_names' column or be a character vector of table names") } - - }else if(is.vector(table)){ - table_name<-table - }else{ + } else if (is.vector(table)) { + table_name <- table + } else { PEcAn.logger::logger.severe("table must either be a dataframe or a vector") } - column_names<-list() - for(i in seq_along(table_name)){ - query <- PEcAn.DB::db.query(paste("SELECT * from", table_name, "LIMIT 1"), con=con) - column_names[[i]]<-colnames(query) - names(column_names)<-table_name - + column_names <- list() + for (i in seq_along(table_name)) { + query <- PEcAn.DB::db.query(paste("SELECT * from", table_name, "LIMIT 1"), con = con) + column_names[[i]] <- colnames(query) + names(column_names) <- table_name } return(column_names) } diff --git a/base/qaqc/R/taylor.plot.R b/base/qaqc/R/taylor.plot.R index 00986770518..6892f85e5bc 100644 --- a/base/qaqc/R/taylor.plot.R +++ b/base/qaqc/R/taylor.plot.R @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------- # Copyright (c) 2012 University of Illinois, NCSA. # All rights reserved. This program and the accompanying materials -# are made available under the terms of the +# are made available under the terms of the # University of Illinois/NCSA Open Source License # which accompanies this distribution, and is available at # http://opensource.ncsa.illinois.edu/license.html @@ -20,7 +20,7 @@ new.taylor <- function(dataset, runid, siteid) { mod <- dataset[sitemask, paste0("model", run)] R <- stats::cor(obs, mod, use = "pairwise") sd.f <- stats::sd(mod) - lab <- paste(paste0("model", run), paste0("site", si)) + lab <- paste(paste0("model", run), paste0("site", si)) if (run == runid[1] && si == siteid[1]) { plotrix::taylor.diagram(obs, mod, pos.cor = FALSE) } else { diff --git a/base/qaqc/R/write_out_table.R b/base/qaqc/R/write_out_table.R index 75ec8879ff7..dd9de9e5cee 100644 --- a/base/qaqc/R/write_out_table.R +++ b/base/qaqc/R/write_out_table.R @@ -1,30 +1,29 @@ ##' write_out_table ##' @author Tempest McCabe -##' -##' @param table a table that is output from one of the find_* fucntions +##' +##' @param table a table that is output from one of the find_* functions ##' @param table_name name of table ##' @param outdir path to folder into which the editable table will be written -##' @param relevant_table_columns a list of all columns to keep. ID and table name will be automatically included. -##' -##' -##' @description This is a fucntion that returns a dataframe with all of the format entries that have no assosiated input records. +##' @param relevant_table_columns a list of all columns to keep. ID and table name will be automatically included. +##' +##' +##' @description This is a function that returns a dataframe with all of the format entries that have no associated input records. ##' ##' For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder ##' or look at the README ##' @export -write_out_table<-function(table,table_name,outdir, relevant_table_columns){ - - if(!"id" %in% relevant_table_columns){ - relevant_table_columns<-c(relevant_table_columns, "id") +write_out_table <- function(table, table_name, outdir, relevant_table_columns) { + if (!"id" %in% relevant_table_columns) { + relevant_table_columns <- c(relevant_table_columns, "id") } - if(!"table_name" %in% relevant_table_columns){ - relevant_table_columns<-c(relevant_table_columns, "id", "table_name") + if (!"table_name" %in% relevant_table_columns) { + relevant_table_columns <- c(relevant_table_columns, "table_name") } - if(!any(c("id", "table_name") %in% names(table))){ + if (!any(c("id", "table_name") %in% names(table))) { PEcAn.logger::logger.severe("table provided doesn't have a table_name or id column or both. ") } - - - table<-table[ , (relevant_table_columns)] - utils::write.table(table, file=paste(outdir,"/query_of_",table_name ,sep=""),row.names = FALSE,sep="|") -} \ No newline at end of file + + + table <- table[, (relevant_table_columns)] + utils::write.table(table, file = paste(outdir, "/query_of_", table_name, sep = ""), row.names = FALSE, sep = "|") +} diff --git a/base/qaqc/man/find_formats_without_inputs.Rd b/base/qaqc/man/find_formats_without_inputs.Rd index c20d769cd1c..7cc57d31d6a 100644 --- a/base/qaqc/man/find_formats_without_inputs.Rd +++ b/base/qaqc/man/find_formats_without_inputs.Rd @@ -20,14 +20,14 @@ find_formats_without_inputs( \item{created_after}{Optional parameter to search by creation date. Date must be in form 'YYYY-MM-DD'.} -\item{updated_after}{Optional parameter to search all entried updated after a certain date. Date must be in form 'YYYY-MM-DD'.} +\item{updated_after}{Optional parameter to search all entries updated after a certain date. Date must be in form 'YYYY-MM-DD'.} -\item{created_before}{Optional parameter to search by creation date. Can be used in conjunciton with created_after to specify a spesific window. Date must be in form 'YYYY-MM-DD'.} +\item{created_before}{Optional parameter to search by creation date. Can be used in conjunction with created_after to specify a specific window. Date must be in form 'YYYY-MM-DD'.} -\item{updated_before}{Optional parameter to search all entried updated before a certain date. Date must be in form 'YYYY-MM-DD'.} +\item{updated_before}{Optional parameter to search all entries updated before a certain date. Date must be in form 'YYYY-MM-DD'.} } \description{ -This is a fucntion that returns a dataframe with all of the format entries that have no assosiated input records. +This is a function that returns a dataframe with all of the format entries that have no associated input records. For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder or look at the README diff --git a/base/qaqc/man/find_inputs_without_formats.Rd b/base/qaqc/man/find_inputs_without_formats.Rd index 1ceb2cc315d..ae19ac4a686 100644 --- a/base/qaqc/man/find_inputs_without_formats.Rd +++ b/base/qaqc/man/find_inputs_without_formats.Rd @@ -23,7 +23,7 @@ find_inputs_without_formats( \item{updated_before, updated_after}{Optional parameter to search all entried updated after a certain date. Date must be in form 'YYYY-MM-DD'} } \description{ -This is a function that returns a dataframe with all of the input entries that have no assosiated format records. +This is a function that returns a dataframe with all of the input entries that have no associated format records. This is very rare in the database. For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder diff --git a/base/qaqc/man/get_table_column_names.Rd b/base/qaqc/man/get_table_column_names.Rd index 44f31c778a5..3757ec959fd 100644 --- a/base/qaqc/man/get_table_column_names.Rd +++ b/base/qaqc/man/get_table_column_names.Rd @@ -7,14 +7,14 @@ get_table_column_names(table, con) } \arguments{ -\item{table}{a table that is output from one of the find_* functions, +\item{table}{a table that is output from one of the find_* functions, or a data.frame containing the output from multiple find_* functions. Could also be a vector of table names.} \item{con}{a connection to the bety database.} } \description{ -This function will return a vector of the column names for a given table(s) in the bety database. -Useful for choseing which columns to include in the written-out table. +This function will return a vector of the column names for a given table(s) in the bety database. +Useful for choosing which columns to include in the written-out table. For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder or look at the README diff --git a/base/qaqc/man/write_out_table.Rd b/base/qaqc/man/write_out_table.Rd index 2eaf9dc57d5..2140fae26eb 100644 --- a/base/qaqc/man/write_out_table.Rd +++ b/base/qaqc/man/write_out_table.Rd @@ -7,7 +7,7 @@ write_out_table(table, table_name, outdir, relevant_table_columns) } \arguments{ -\item{table}{a table that is output from one of the find_* fucntions} +\item{table}{a table that is output from one of the find_* functions} \item{table_name}{name of table} @@ -16,7 +16,7 @@ write_out_table(table, table_name, outdir, relevant_table_columns) \item{relevant_table_columns}{a list of all columns to keep. ID and table name will be automatically included.} } \description{ -This is a fucntion that returns a dataframe with all of the format entries that have no assosiated input records. +This is a function that returns a dataframe with all of the format entries that have no associated input records. For more information on how to use this function see the "Pre-release-database-cleanup" script in the 'vignettes' folder or look at the README diff --git a/base/qaqc/tests/testthat/test.cull_database_entries.R b/base/qaqc/tests/testthat/test.cull_database_entries.R new file mode 100644 index 00000000000..0b81d294fa6 --- /dev/null +++ b/base/qaqc/tests/testthat/test.cull_database_entries.R @@ -0,0 +1,25 @@ +test_that("`cull_database_entries()` gives errors for faulty inputs",{ + expect_error( + cull_database_entries(outdir = 'test'), + "If a table object hasn't been provided, a file_name must be set." + ) + expect_error( + cull_database_entries(table = 'test_table', file_name = 'test_file', outdir = 'test'), + "table and file_name cannot both be provided." + ) + expect_error( + cull_database_entries(table = 'test_table', outdir = 'test'), + "Please provide a table_name" + ) +}) + +test_that("`cull_database_entries()` able to correctly add logs to the output file", { + withr::with_dir(tempdir(), { + mockery::stub(cull_database_entries, 'PEcAn.DB::db.query', 'test_log') + dir <- getwd() + cull_database_entries(table = data.frame(id = 1), table_name = 'test', con = 1, outdir = dir) + expect_true(file.exists(paste0(dir, "/deletion_log_of_test"))) + file_data <- readLines(paste0(dir, "/deletion_log_of_test")) + expect_equal(grepl("test_log", file_data), c(TRUE, TRUE)) + }) +}) \ No newline at end of file diff --git a/base/qaqc/tests/testthat/test.find_formats_without_inputs.R b/base/qaqc/tests/testthat/test.find_formats_without_inputs.R new file mode 100644 index 00000000000..f61af2368b5 --- /dev/null +++ b/base/qaqc/tests/testthat/test.find_formats_without_inputs.R @@ -0,0 +1,14 @@ +test_that("`find_formats_without_inputs()` able to find formats with no input record",{ + format_command_mock <- data.frame(user_id = '2020', created_at = '2001-01-01', updated_at = '2010-01-01') + input_command_mock <- data.frame(format_id = '2000', user_id = '2021', created_at = '2002-01-02', updated_at = '2012-01-02') + mocked_res <- mockery::mock(input_command_mock, format_command_mock) + mockery::stub(find_formats_without_inputs, 'dplyr::tbl', mocked_res) + + res <- find_formats_without_inputs( + con = NULL, user_id_code = '2020', created_after = '2000-01-01', updated_after = '2009-01-01', created_before = '2002-01-01', updated_before = '2011-01-01' + ) + expect_equal( + res, + data.frame(id = '2020', created_at = '2001-01-01', updated_at = '2010-01-01', table_name = "formats") + ) +}) \ No newline at end of file diff --git a/base/qaqc/tests/testthat/test.find_inputs_without_formats.R b/base/qaqc/tests/testthat/test.find_inputs_without_formats.R new file mode 100644 index 00000000000..75c4b86d36b --- /dev/null +++ b/base/qaqc/tests/testthat/test.find_inputs_without_formats.R @@ -0,0 +1,13 @@ +test_that("`find_inputs_without_formats()` able to find inputs with no format records", { + input_command_mock <- data.frame(format_id = '2020', user_id = '2020', created_at = '2001-01-01', updated_at = '2010-01-01') + format_command_mock <- data.frame(user_id = '2021', created_at = '2002-01-02', updated_at = '2012-01-02') + mocked_res <- mockery::mock(input_command_mock, format_command_mock) + mockery::stub(find_inputs_without_formats, 'dplyr::tbl', mocked_res) + res <- find_inputs_without_formats( + con = NULL, user_id = '2020', created_after = '2000-01-01', updated_after = '2009-01-01', created_before = '2002-01-01', updated_before = '2011-01-01' + ) + expect_equal( + res, + data.frame(id = '2020', user_id = '2020',created_at = '2001-01-01', updated_at = '2010-01-01', table_name = "inputs") + ) +}) \ No newline at end of file diff --git a/base/qaqc/tests/testthat/test.get_table_column_names.R b/base/qaqc/tests/testthat/test.get_table_column_names.R new file mode 100644 index 00000000000..8a313365692 --- /dev/null +++ b/base/qaqc/tests/testthat/test.get_table_column_names.R @@ -0,0 +1,8 @@ +test_that("`get_table_column_names()` able to return the column names of a table as a list",{ + mocked_res <- mockery::mock(data.frame(head1 = 1, head2 = 2)) + mockery::stub(get_table_column_names, 'PEcAn.DB::db.query', mocked_res) + res <- get_table_column_names(table = data.frame(table_name = 'test_table'), con = 1) + args <- mockery::mock_args(mocked_res) + expect_equal(args[[1]][[1]], "SELECT * from test_table LIMIT 1") + expect_equal(res, list(test_table = c("head1", "head2"))) +}) \ No newline at end of file diff --git a/base/qaqc/tests/testthat/test.write_out_table.R b/base/qaqc/tests/testthat/test.write_out_table.R new file mode 100644 index 00000000000..fb4cedd6ed5 --- /dev/null +++ b/base/qaqc/tests/testthat/test.write_out_table.R @@ -0,0 +1,11 @@ +test_that("`write_out_table()` able to create and update output file with relevant data",{ + withr::with_dir(tempdir(), { + dir <- getwd() + write_out_table( + table = data.frame(id = 1, table_name = 'test'), table_name = 'test', relevant_table_columns = c(), outdir = dir + ) + expect_true(file.exists(paste0(dir, "/query_of_test"))) + file_data <- readLines(paste0(dir, "/query_of_test")) + expect_equal(grepl("test", file_data), c(FALSE, TRUE)) + }) +}) \ No newline at end of file