From 5c588f868449b190b774631ae404fe27ecf96c45 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 30 Apr 2024 12:57:53 -0400 Subject: [PATCH 1/6] draft correlations for scatter plots --- R/class-plotdata-scatter.R | 32 +++++++++++++++++++++++++++++++- R/group.R | 32 ++++++++++++++++++++++++++++++++ R/utils-stats.R | 21 +++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index 09478e0..600dbea 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -3,6 +3,7 @@ newScatterPD <- function(.dt = data.table::data.table(), value = character(), useGradientColorscale = FALSE, overlayValues = veupathUtils::BinList(), + correlationMethod = character(), sampleSizes = logical(), completeCases = logical(), evilMode = character(), @@ -30,18 +31,31 @@ newScatterPD <- function(.dt = data.table::data.table(), group <- veupathUtils::findColNamesFromPlotRef(variables, 'overlay') panel <- findPanelColName(veupathUtils::findVariableSpecFromPlotRef(variables, 'facet1'), veupathUtils::findVariableSpecFromPlotRef(variables, 'facet2')) + + dtForCorr <- data.table::copy(.pd) + attributes(dtForCorr)$class <- c('data.table', 'data.frame') if (useGradientColorscale) { + #series data w gradient .pd$overlayMissingData <- is.na(.pd[[group]]) series <- collapseByGroup(.pd, group = 'overlayMissingData', panel) .pd$overlayMissingData <- NULL series$overlayMissingData <- NULL data.table::setnames(series, c(panel, 'seriesX', 'seriesY', 'seriesGradientColorscale')) + + # corr results w gradient + dtForCorr[[group]] <- NULL + corrResult <- groupCorrelation(dtForCorr, x, y, NULL, panel, correlationMethod = correlationMethod) } else { + #series data w/o gradient series <- collapseByGroup(.pd, group, panel) data.table::setnames(series, c(group, panel, 'seriesX', 'seriesY')) + + # corr results w/o gradient + corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) } - + veupathUtils::logWithTime('Calculated correlation results per group.', verbose) + if (xType == 'DATE') { series$seriesX <- lapply(series$seriesX, format, '%Y-%m-%d') } else { @@ -103,6 +117,14 @@ newScatterPD <- function(.dt = data.table::data.table(), } else { .pd <- series } + + if (!is.null(key(.pd))) { + .pd <- merge(.pd, corrResult) + } else { + .pd <- cbind(.pd, corrResult) + } + + attr$correlationMethod <- correlationMethod attr$names <- names(.pd) if (useGradientColorscale) attr$useGradientColorscale <- useGradientColorscale @@ -160,6 +182,7 @@ validateScatterPD <- function(.scatter, verbose) { #' to include raw data with smoothed mean. Note only 'raw' is compatible with a continuous #' overlay variable. #' @param overlayValues veupathUtils::BinList providing overlay values of interest +#' @param correlationMethod character indicating which correlation method to use. One of 'pearson', 'spearman' or 'sparcc'. #' @param sampleSizes boolean indicating if sample sizes should be computed #' @param completeCases boolean indicating if complete cases should be computed #' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') @@ -207,6 +230,7 @@ scattergl.dt <- function(data, 'density', 'raw'), overlayValues = NULL, + correlationMethod = c('pearson', 'sparcc', 'spearman'), sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c('noVariables', 'allVariables', 'strataVariables'), @@ -216,6 +240,7 @@ scattergl.dt <- function(data, if (!inherits(variables, 'VariableMetadataList')) stop("The `variables` argument must be a VariableMetadataList object.") value <- veupathUtils::matchArg(value) + correlationMethod <- veupathUtils::matchArg(correlationMethod) sampleSizes <- veupathUtils::matchArg(sampleSizes) completeCases <- veupathUtils::matchArg(completeCases) evilMode <- veupathUtils::matchArg(evilMode) @@ -269,6 +294,7 @@ scattergl.dt <- function(data, value = value, useGradientColorscale = useGradientColorscale, overlayValues = overlayValues, + correlationMethod = correlationMethod, sampleSizes = sampleSizes, completeCases = completeCases, inferredVarAxis = 'y', @@ -277,6 +303,7 @@ scattergl.dt <- function(data, .scatter <- validateScatterPD(.scatter, verbose) veupathUtils::logWithTime(paste('New scatter plot object created with parameters value =', value, + ', correlationMethod =', correlationMethod, ', sampleSizes = ', sampleSizes, ', completeCases = ', completeCases, ', evilMode =', evilMode, @@ -318,6 +345,7 @@ scattergl.dt <- function(data, #' 'density' estimates (no raw data returned), alternatively 'smoothedMeanWithRaw' to include raw #' data with smoothed mean. Note only 'raw' is compatible with a continuous overlay variable. #' @param overlayValues veupathUtils::BinList providing overlay values of interest +#' @param correlationMethod character indicating which correlation method to use. One of 'pearson', 'spearman' or 'sparcc'. #' @param sampleSizes boolean indicating if sample sizes should be computed #' @param completeCases boolean indicating if complete cases should be computed #' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') @@ -365,6 +393,7 @@ scattergl <- function(data, 'density', 'raw'), overlayValues = NULL, + correlationMethod = c('pearson', 'sparcc', 'spearman'), sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c('noVariables', 'allVariables', 'strataVariables'), @@ -376,6 +405,7 @@ scattergl <- function(data, variables, value = value, overlayValues = overlayValues, + correlationMethod = correlationMethod, sampleSizes = sampleSizes, completeCases = completeCases, evilMode = evilMode, diff --git a/R/group.R b/R/group.R index 22e98e7..3a607fd 100644 --- a/R/group.R +++ b/R/group.R @@ -168,6 +168,38 @@ groupDensity <- function(data, x = NULL, y, group = NULL, panel = NULL, geo = NU return(dt) } +## NOTE: if this is done right, we shouldnt have to collapse the result ever +## it should be that each group gets a single value for each of dataX, dataY, correlationCoef, pValue +## the only way that wouldnt be true was if we had more than two cols in .SD, which shouldnt be the case +groupCorrelation <- function( + data, + x, + y, + group = NULL, + panel = NULL, + geo = NULL, + correlationMethod = c('pearson', 'spearman', 'sparcc') +) { + veupathUtils::matchArg(correlationMethod) + + byCols <- colnames(data)[colnames(data) %in% c(group, geo, panel)] + if (all(is.null(c(group,geo,panel)))) { + dt <- data[, {corrResult <- correlationOrEmpty(.SD, method = correlationMethod); + list(correlationCoef = corrResult[[3]], + pValue = corrResult[[4]])}] + } else { + dt <- data[, {corrResult <- correlationOrEmpty(.SD, method = correlationMethod); + list(correlationCoef = corrResult[[3]], + pValue = corrResult[[4]])}, + keyby=eval(byCols)] + } + + indexCols <- c(panel, geo, group) + setkeyv(dt, indexCols) + + return(dt) +} + #' @importFrom purrr reduce groupSmoothedMean <- function(data, x, y, group = NULL, panel = NULL, geo = NULL, collapse = TRUE) { diff --git a/R/utils-stats.R b/R/utils-stats.R index 254c756..25fa618 100644 --- a/R/utils-stats.R +++ b/R/utils-stats.R @@ -1,3 +1,24 @@ +## this wraps veupathUtils::correlation, except wont err for an empty dt +## it also returns the dt of correlation coeficients and p-values rather +## than the ComputeResult object +correlationOrEmpty <- function(dt, method = c('pearson', 'spearman', 'sparcc')) { + method <- veupathUtils::matchArg(method) + + emptyResult <- data.table::data.table(dataX = NA_character_, dataY = NA_character_, correlationCoef = NA_real_, pValue = NA_real_) + if (is.null(dt)) return(emptyResult) + if (nrow(dt) == 0) return(emptyResult) + + result <- try(suppressWarnings(veupathUtils::correlation(dt, NULL, method = method, format = 'data.table', verbose = FALSE)), silent = TRUE) + + if (any(veupathUtils::is.error(result)) || nrow(result) == 0) { + emptyResult$dataX <- names(dt)[1] + emptyResult$dataY <- names(dt)[2] + return(emptyResult) + } else { + return(result) + } +} + formatPValue <- function(pvalue) { if (pvalue < 0.0001) return("<0.0001") return(as.character(signif(pvalue, 2))) From 5e23942b928d7b7799a824d4c1f7e5d1587c73e3 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 30 Apr 2024 12:58:06 -0400 Subject: [PATCH 2/6] update scatter tests to include correlation coefs --- tests/testthat/test-scattergl.R | 108 ++++++++++++++++---------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 58a342f..ba9ae06 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -114,7 +114,7 @@ test_that("scattergl.dt() returns a valid plot.data scatter object", { expect_is(dt, 'plot.data') expect_is(dt, 'scatterplot') namedAttrList <- getPDAttributes(dt) - expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable')) + expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable','correlationMethod')) completeCases <- completeCasesTable(dt) expect_equal(names(completeCases), c('variableDetails','completeCases')) expect_equal(nrow(completeCases), 4) @@ -127,7 +127,7 @@ test_that("scattergl.dt() returns a valid plot.data scatter object", { expect_is(dt, 'plot.data') expect_is(dt, 'scatterplot') namedAttrList <- getPDAttributes(dt) - expect_equal(names(namedAttrList),c('variables')) + expect_equal(names(namedAttrList),c('variables','correlationMethod')) }) @@ -264,23 +264,23 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'seriesX', 'seriesY')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) expect_equal(as.character(range(df$entity.dateA)), range(dt$seriesX)) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -315,29 +315,29 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'seriesX', 'seriesY')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) numericSeriesX <- lapply(dt$seriesX, as.numeric) expect_equal(range(df$entity.contA), range(numericSeriesX)) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'densityX', 'densityY')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'densityX', 'densityY', 'correlationCoef', 'pValue')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -365,27 +365,27 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY')) + expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'densityX', 'densityY')) + expect_equal(names(dt),c('entity.cat3', 'densityX', 'densityY', 'correlationCoef', 'pValue')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -413,27 +413,27 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY')) + expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'densityX', 'densityY')) + expect_equal(names(dt),c('entity.cat4', 'densityX', 'densityY', 'correlationCoef', 'pValue')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -454,27 +454,27 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('seriesX', 'seriesY')) + expect_equal(names(dt),c('seriesX', 'seriesY', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) + expect_equal(names(dt),c('smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + expect_equal(names(dt),c('seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('densityX', 'densityY')) + expect_equal(names(dt),c('densityX', 'densityY', 'correlationCoef', 'pValue')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -501,7 +501,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 1) - expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale')) + expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) expect_true(identical(dt$seriesGradientColorscale[[1]], as.character(df$entity.contC))) variables <- new("VariableMetadataList", SimpleList( @@ -544,7 +544,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 12) - expect_equal(names(dt), c('panel', 'seriesX', 'seriesY', 'seriesGradientColorscale')) + expect_equal(names(dt), c('panel', 'seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) expect_equal(length(dt$seriesGradientColorscale[[1]]), length(dt$seriesX[[1]])) variables <- new("VariableMetadataList", SimpleList( @@ -579,7 +579,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 4) - expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale')) + expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) expect_equal(length(dt$seriesGradientColorscale[[1]]), length(dt$seriesX[[1]])) ## Collection vars @@ -615,7 +615,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('entity.cat3', 'entity.collection', 'seriesX', 'seriesY')) + expect_equal(names(dt), c('entity.collection', 'entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) expect_equal(unique(dt$entity.collection), c('contA','contB','contC')) expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'facet1')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -652,7 +652,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('panel', 'seriesX', 'seriesY')) + expect_equal(names(dt), c('panel', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) expect_equal(dt$panel[1], 'cat3_a.||.contA') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'facet2')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -692,7 +692,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('entity.collection', 'entity.cat3', 'seriesX', 'seriesY')) + expect_equal(names(dt), c('entity.cat3', 'entity.collection', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) expect_equal(unique(dt$entity.collection), c('contA','contB','contC')) expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'overlay')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -734,7 +734,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 3) - expect_equal(names(dt), c('entity.collection', 'entity.cat3', 'seriesX', 'seriesY')) + expect_equal(names(dt), c('entity.cat3', 'entity.collection', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) expect_equal(unique(dt$entity.collection), c('contB')) expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'overlay')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -772,7 +772,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('entity.cat3', 'entity.factor3','seriesX','seriesY')) + expect_equal(names(dt), c('entity.factor3', 'entity.cat3','seriesX','seriesY', 'correlationCoef', 'pValue')) expect_equal(class(dt$entity.factor3), 'character') variables <- new("VariableMetadataList", SimpleList( @@ -807,7 +807,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 18) - expect_equal(names(dt), c('panel','seriesX','seriesY')) + expect_equal(names(dt), c('panel','seriesX','seriesY', 'correlationCoef', 'pValue')) expect_equal(class(dt$panel), 'character') variables <- new("VariableMetadataList", SimpleList( @@ -842,7 +842,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('panel','seriesX','seriesY')) + expect_equal(names(dt), c('panel','seriesX','seriesY', 'correlationCoef', 'pValue')) expect_equal(class(dt$panel), 'character') }) @@ -886,11 +886,11 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 12) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(names(jsonList$scatterplot$config$variables),c("variableClass","variableSpec","plotReference","dataType","dataShape","isCollection","imputeZero","hasStudyDependentVocabulary")) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId, c("contB","contA","cat3","cat4")) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails','facetVariableDetails','size')) @@ -937,11 +937,11 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 4) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(names(jsonList$scatterplot$config$variables$variableSpec),c('variableId','entityId')) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId[jsonList$scatterplot$config$variables$plotReference == 'overlay'], 'contC') expect_equal(names(jsonList$sampleSizeTable),c('facetVariableDetails','size')) @@ -988,11 +988,11 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 24) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(names(jsonList$scatterplot$config$variables),c("variableClass","variableSpec","plotReference","dataType","dataShape","isCollection","imputeZero","hasStudyDependentVocabulary")) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId, c("contB","contA","int6","cat4")) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails','facetVariableDetails','size')) @@ -1043,9 +1043,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('overlayVariableDetails','facetVariableDetails','seriesX','seriesY')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$overlayVariableDetails),c('variableId','entityId','value')) - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(jsonList$scatterplot$config$completeCasesAllVars, nrow(df)) expect_equal(jsonList$scatterplot$config$completeCasesAxesVars, nrow(df)) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails', 'facetVariableDetails','size')) @@ -1094,9 +1094,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value','displayLabel')) - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(jsonList$scatterplot$config$completeCasesAllVars, nrow(df)) expect_equal(jsonList$scatterplot$config$completeCasesAxesVars, nrow(df)) expect_equal(names(jsonList$sampleSizeTable),c('facetVariableDetails','size')) @@ -1112,9 +1112,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value','displayLabel')) - expect_equal(names(jsonList$scatterplot$config),c('variables')) + expect_equal(names(jsonList$scatterplot$config),c('variables','correlationMethod')) @@ -1157,9 +1157,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('overlayVariableDetails','facetVariableDetails','seriesX','seriesY')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$overlayVariableDetails),c('variableId','entityId','value','displayLabel')) - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(jsonList$scatterplot$config$completeCasesAllVars, nrow(df)) expect_equal(jsonList$scatterplot$config$completeCasesAxesVars, nrow(df)) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails', 'facetVariableDetails','size')) @@ -1338,7 +1338,7 @@ test_that("scattergl.dt() returns correct information about missing data", { dt <- scattergl.dt(df, variables, 'raw', evilMode = 'strataVariables') expect_equal(nrow(dt), 2) - expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale')) + expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) expect_equal(lapply(dt$seriesGradientColorscale, length), lapply(dt$seriesX, length)) expect_true(all(is.na(dt$seriesGradientColorscale[[2]]))) @@ -1374,7 +1374,7 @@ test_that("scattergl.dt() returns correct information about missing data", { )) dt <- scattergl.dt(df, variables, 'raw', evilMode = 'strataVariables') - expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale')) + expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) expect_equal(lapply(dt$seriesGradientColorscale, length), lapply(dt$seriesX, length)) expect_true(all(is.na(dt$seriesGradientColorscale[[2]]))) @@ -1384,12 +1384,12 @@ test_that("scattergl.dt() returns correct information about missing data", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale','correlationCoef','pValue')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 10) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') expect_true(all(is.na(jsonList$scatterplot$data$seriesGradientColorscale[[2]]))) # should be null in the json file, but fromJSON converts to NA - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) expect_equal(names(jsonList$scatterplot$config$variables$variableSpec),c('variableId','entityId')) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId[jsonList$scatterplot$config$variables$plotReference == 'overlay'], 'contC') expect_equal(names(jsonList$sampleSizeTable),c('facetVariableDetails','size')) From 56d84993ce4074897af975ce68ba4c23666868ee Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 1 May 2024 12:37:01 -0400 Subject: [PATCH 3/6] Revert "update scatter tests to include correlation coefs" This reverts commit 5e23942b928d7b7799a824d4c1f7e5d1587c73e3. --- tests/testthat/test-scattergl.R | 108 ++++++++++++++++---------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index ba9ae06..58a342f 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -114,7 +114,7 @@ test_that("scattergl.dt() returns a valid plot.data scatter object", { expect_is(dt, 'plot.data') expect_is(dt, 'scatterplot') namedAttrList <- getPDAttributes(dt) - expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable','correlationMethod')) + expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable')) completeCases <- completeCasesTable(dt) expect_equal(names(completeCases), c('variableDetails','completeCases')) expect_equal(nrow(completeCases), 4) @@ -127,7 +127,7 @@ test_that("scattergl.dt() returns a valid plot.data scatter object", { expect_is(dt, 'plot.data') expect_is(dt, 'scatterplot') namedAttrList <- getPDAttributes(dt) - expect_equal(names(namedAttrList),c('variables','correlationMethod')) + expect_equal(names(namedAttrList),c('variables')) }) @@ -264,23 +264,23 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'seriesX', 'seriesY')) expect_equal(as.character(range(df$entity.dateA)), range(dt$seriesX)) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -315,29 +315,29 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'seriesX', 'seriesY')) numericSeriesX <- lapply(dt$seriesX, as.numeric) expect_equal(range(df$entity.contA), range(numericSeriesX)) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),12) - expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'densityX', 'densityY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'entity.cat4', 'densityX', 'densityY')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -365,27 +365,27 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),3) - expect_equal(names(dt),c('entity.cat3', 'densityX', 'densityY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat3', 'densityX', 'densityY')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -413,27 +413,27 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),4) - expect_equal(names(dt),c('entity.cat4', 'densityX', 'densityY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('entity.cat4', 'densityX', 'densityY')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -454,27 +454,27 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('seriesX', 'seriesY')) dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('seriesX', 'seriesY', 'smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'smoothedMean') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('smoothedMeanX', 'smoothedMeanY', 'smoothedMeanSE', 'smoothedMeanError')) dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) dt <- scattergl.dt(df, variables, 'density') expect_is(dt, 'data.table') expect_equal(nrow(dt),1) - expect_equal(names(dt),c('densityX', 'densityY', 'correlationCoef', 'pValue')) + expect_equal(names(dt),c('densityX', 'densityY')) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -501,7 +501,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 1) - expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale')) expect_true(identical(dt$seriesGradientColorscale[[1]], as.character(df$entity.contC))) variables <- new("VariableMetadataList", SimpleList( @@ -544,7 +544,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 12) - expect_equal(names(dt), c('panel', 'seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('panel', 'seriesX', 'seriesY', 'seriesGradientColorscale')) expect_equal(length(dt$seriesGradientColorscale[[1]]), length(dt$seriesX[[1]])) variables <- new("VariableMetadataList", SimpleList( @@ -579,7 +579,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 4) - expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale')) expect_equal(length(dt$seriesGradientColorscale[[1]]), length(dt$seriesX[[1]])) ## Collection vars @@ -615,7 +615,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('entity.collection', 'entity.cat3', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('entity.cat3', 'entity.collection', 'seriesX', 'seriesY')) expect_equal(unique(dt$entity.collection), c('contA','contB','contC')) expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'facet1')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -652,7 +652,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('panel', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('panel', 'seriesX', 'seriesY')) expect_equal(dt$panel[1], 'cat3_a.||.contA') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'facet2')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -692,7 +692,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('entity.cat3', 'entity.collection', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('entity.collection', 'entity.cat3', 'seriesX', 'seriesY')) expect_equal(unique(dt$entity.collection), c('contA','contB','contC')) expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'overlay')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -734,7 +734,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_is(dt, 'data.table') expect_equal(nrow(dt), 3) - expect_equal(names(dt), c('entity.cat3', 'entity.collection', 'seriesX', 'seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('entity.collection', 'entity.cat3', 'seriesX', 'seriesY')) expect_equal(unique(dt$entity.collection), c('contB')) expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'overlay')@variableId, 'collection') expect_equal(veupathUtils::findVariableSpecFromPlotRef(attr(dt, 'variables'), 'yAxis')@variableId, 'collectionVarValues') @@ -772,7 +772,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('entity.factor3', 'entity.cat3','seriesX','seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('entity.cat3', 'entity.factor3','seriesX','seriesY')) expect_equal(class(dt$entity.factor3), 'character') variables <- new("VariableMetadataList", SimpleList( @@ -807,7 +807,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 18) - expect_equal(names(dt), c('panel','seriesX','seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('panel','seriesX','seriesY')) expect_equal(class(dt$panel), 'character') variables <- new("VariableMetadataList", SimpleList( @@ -842,7 +842,7 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { dt <- scattergl.dt(df, variables, 'raw') expect_equal(nrow(dt), 9) - expect_equal(names(dt), c('panel','seriesX','seriesY', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('panel','seriesX','seriesY')) expect_equal(class(dt$panel), 'character') }) @@ -886,11 +886,11 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 12) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(names(jsonList$scatterplot$config$variables),c("variableClass","variableSpec","plotReference","dataType","dataShape","isCollection","imputeZero","hasStudyDependentVocabulary")) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId, c("contB","contA","cat3","cat4")) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails','facetVariableDetails','size')) @@ -937,11 +937,11 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 4) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(names(jsonList$scatterplot$config$variables$variableSpec),c('variableId','entityId')) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId[jsonList$scatterplot$config$variables$plotReference == 'overlay'], 'contC') expect_equal(names(jsonList$sampleSizeTable),c('facetVariableDetails','size')) @@ -988,11 +988,11 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 24) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(names(jsonList$scatterplot$config$variables),c("variableClass","variableSpec","plotReference","dataType","dataShape","isCollection","imputeZero","hasStudyDependentVocabulary")) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId, c("contB","contA","int6","cat4")) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails','facetVariableDetails','size')) @@ -1043,9 +1043,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('overlayVariableDetails','facetVariableDetails','seriesX','seriesY')) expect_equal(names(jsonList$scatterplot$data$overlayVariableDetails),c('variableId','entityId','value')) - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(jsonList$scatterplot$config$completeCasesAllVars, nrow(df)) expect_equal(jsonList$scatterplot$config$completeCasesAxesVars, nrow(df)) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails', 'facetVariableDetails','size')) @@ -1094,9 +1094,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value','displayLabel')) - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(jsonList$scatterplot$config$completeCasesAllVars, nrow(df)) expect_equal(jsonList$scatterplot$config$completeCasesAxesVars, nrow(df)) expect_equal(names(jsonList$sampleSizeTable),c('facetVariableDetails','size')) @@ -1112,9 +1112,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value','displayLabel')) - expect_equal(names(jsonList$scatterplot$config),c('variables','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables')) @@ -1157,9 +1157,9 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('overlayVariableDetails','facetVariableDetails','seriesX','seriesY')) expect_equal(names(jsonList$scatterplot$data$overlayVariableDetails),c('variableId','entityId','value','displayLabel')) - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(jsonList$scatterplot$config$completeCasesAllVars, nrow(df)) expect_equal(jsonList$scatterplot$config$completeCasesAxesVars, nrow(df)) expect_equal(names(jsonList$sampleSizeTable),c('overlayVariableDetails', 'facetVariableDetails','size')) @@ -1338,7 +1338,7 @@ test_that("scattergl.dt() returns correct information about missing data", { dt <- scattergl.dt(df, variables, 'raw', evilMode = 'strataVariables') expect_equal(nrow(dt), 2) - expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('seriesX', 'seriesY', 'seriesGradientColorscale')) expect_equal(lapply(dt$seriesGradientColorscale, length), lapply(dt$seriesX, length)) expect_true(all(is.na(dt$seriesGradientColorscale[[2]]))) @@ -1374,7 +1374,7 @@ test_that("scattergl.dt() returns correct information about missing data", { )) dt <- scattergl.dt(df, variables, 'raw', evilMode = 'strataVariables') - expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale', 'correlationCoef', 'pValue')) + expect_equal(names(dt), c('entity.cat4', 'seriesX', 'seriesY', 'seriesGradientColorscale')) expect_equal(lapply(dt$seriesGradientColorscale, length), lapply(dt$seriesX, length)) expect_true(all(is.na(dt$seriesGradientColorscale[[2]]))) @@ -1384,12 +1384,12 @@ test_that("scattergl.dt() returns correct information about missing data", { expect_equal(names(jsonList),c('scatterplot','sampleSizeTable', 'completeCasesTable')) expect_equal(names(jsonList$scatterplot),c('data','config')) - expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','seriesX','seriesY','seriesGradientColorscale')) expect_equal(names(jsonList$scatterplot$data$facetVariableDetails[[1]]),c('variableId','entityId','value')) expect_equal(length(jsonList$scatterplot$data$facetVariableDetails), 10) expect_equal(jsonList$scatterplot$data$facetVariableDetails[[1]]$variableId, 'cat4') expect_true(all(is.na(jsonList$scatterplot$data$seriesGradientColorscale[[2]]))) # should be null in the json file, but fromJSON converts to NA - expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(names(jsonList$scatterplot$config),c('variables','completeCasesAllVars','completeCasesAxesVars')) expect_equal(names(jsonList$scatterplot$config$variables$variableSpec),c('variableId','entityId')) expect_equal(jsonList$scatterplot$config$variables$variableSpec$variableId[jsonList$scatterplot$config$variables$plotReference == 'overlay'], 'contC') expect_equal(names(jsonList$sampleSizeTable),c('facetVariableDetails','size')) From 1bc8b750ad7a0594992b0606a32518ad26c39f29 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 1 May 2024 12:51:41 -0400 Subject: [PATCH 4/6] address review feedback --- R/class-plotdata-scatter.R | 36 ++++++++++++++++++++++-------------- R/group.R | 12 ++++++++---- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/R/class-plotdata-scatter.R b/R/class-plotdata-scatter.R index 600dbea..c4903df 100644 --- a/R/class-plotdata-scatter.R +++ b/R/class-plotdata-scatter.R @@ -32,8 +32,7 @@ newScatterPD <- function(.dt = data.table::data.table(), panel <- findPanelColName(veupathUtils::findVariableSpecFromPlotRef(variables, 'facet1'), veupathUtils::findVariableSpecFromPlotRef(variables, 'facet2')) - dtForCorr <- data.table::copy(.pd) - attributes(dtForCorr)$class <- c('data.table', 'data.frame') + dtForCorr <- data.table::as.data.table(.pd) if (useGradientColorscale) { #series data w gradient @@ -43,16 +42,20 @@ newScatterPD <- function(.dt = data.table::data.table(), series$overlayMissingData <- NULL data.table::setnames(series, c(panel, 'seriesX', 'seriesY', 'seriesGradientColorscale')) - # corr results w gradient + # corr results w gradient, same as w/o groups so set group to NULL dtForCorr[[group]] <- NULL - corrResult <- groupCorrelation(dtForCorr, x, y, NULL, panel, correlationMethod = correlationMethod) + if (correlationMethod != 'none') { + corrResult <- groupCorrelation(dtForCorr, x, y, NULL, panel, correlationMethod = correlationMethod) + } } else { #series data w/o gradient series <- collapseByGroup(.pd, group, panel) data.table::setnames(series, c(group, panel, 'seriesX', 'seriesY')) # corr results w/o gradient - corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) + if (correlationMethod != 'none') { + corrResult <- groupCorrelation(dtForCorr, x, y, group, panel, correlationMethod = correlationMethod) + } } veupathUtils::logWithTime('Calculated correlation results per group.', verbose) @@ -118,13 +121,16 @@ newScatterPD <- function(.dt = data.table::data.table(), .pd <- series } - if (!is.null(key(.pd))) { - .pd <- merge(.pd, corrResult) - } else { - .pd <- cbind(.pd, corrResult) + if (correlationMethod != 'none') { + if (!is.null(key(.pd))) { + .pd <- merge(.pd, corrResult) + } else { + .pd <- cbind(.pd, corrResult) + } + + attr$correlationMethod <- jsonlite::unbox(correlationMethod) } - attr$correlationMethod <- correlationMethod attr$names <- names(.pd) if (useGradientColorscale) attr$useGradientColorscale <- useGradientColorscale @@ -182,7 +188,8 @@ validateScatterPD <- function(.scatter, verbose) { #' to include raw data with smoothed mean. Note only 'raw' is compatible with a continuous #' overlay variable. #' @param overlayValues veupathUtils::BinList providing overlay values of interest -#' @param correlationMethod character indicating which correlation method to use. One of 'pearson', 'spearman' or 'sparcc'. +#' @param correlationMethod character indicating which correlation method to use. One of 'pearson', +#' 'spearman', 'sparcc' or 'none'. Default is 'none'. #' @param sampleSizes boolean indicating if sample sizes should be computed #' @param completeCases boolean indicating if complete cases should be computed #' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') @@ -230,7 +237,7 @@ scattergl.dt <- function(data, 'density', 'raw'), overlayValues = NULL, - correlationMethod = c('pearson', 'sparcc', 'spearman'), + correlationMethod = c('none','pearson', 'sparcc', 'spearman'), sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c('noVariables', 'allVariables', 'strataVariables'), @@ -345,7 +352,8 @@ scattergl.dt <- function(data, #' 'density' estimates (no raw data returned), alternatively 'smoothedMeanWithRaw' to include raw #' data with smoothed mean. Note only 'raw' is compatible with a continuous overlay variable. #' @param overlayValues veupathUtils::BinList providing overlay values of interest -#' @param correlationMethod character indicating which correlation method to use. One of 'pearson', 'spearman' or 'sparcc'. +#' @param correlationMethod character indicating which correlation method to use. One of 'pearson', +#' 'spearman','sparcc' or 'none'. Default is 'none'. #' @param sampleSizes boolean indicating if sample sizes should be computed #' @param completeCases boolean indicating if complete cases should be computed #' @param evilMode String indicating how evil this plot is ('strataVariables', 'allVariables', 'noVariables') @@ -393,7 +401,7 @@ scattergl <- function(data, 'density', 'raw'), overlayValues = NULL, - correlationMethod = c('pearson', 'sparcc', 'spearman'), + correlationMethod = c('none','pearson', 'sparcc', 'spearman'), sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c('noVariables', 'allVariables', 'strataVariables'), diff --git a/R/group.R b/R/group.R index 3a607fd..b26d434 100644 --- a/R/group.R +++ b/R/group.R @@ -182,15 +182,19 @@ groupCorrelation <- function( ) { veupathUtils::matchArg(correlationMethod) + if (length(dt) > 2) { + stop('Correlation can only be computed for two variables.') + } + byCols <- colnames(data)[colnames(data) %in% c(group, geo, panel)] if (all(is.null(c(group,geo,panel)))) { dt <- data[, {corrResult <- correlationOrEmpty(.SD, method = correlationMethod); - list(correlationCoef = corrResult[[3]], - pValue = corrResult[[4]])}] + list(correlationCoef = corrResult$correlationCoef, + pValue = corrResult$pValue)}] } else { dt <- data[, {corrResult <- correlationOrEmpty(.SD, method = correlationMethod); - list(correlationCoef = corrResult[[3]], - pValue = corrResult[[4]])}, + list(correlationCoef = corrResult$correlationCoef, + pValue = corrResult$pValue)}, keyby=eval(byCols)] } From 412ed7080bfebd8d9c56cabf58deb800aeb0df54 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 1 May 2024 12:51:55 -0400 Subject: [PATCH 5/6] update docs --- man/scattergl.Rd | 4 ++++ man/scattergl.dt.Rd | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/man/scattergl.Rd b/man/scattergl.Rd index 1de66e5..35c62fc 100644 --- a/man/scattergl.Rd +++ b/man/scattergl.Rd @@ -10,6 +10,7 @@ scattergl( value = c("smoothedMean", "smoothedMeanWithRaw", "bestFitLineWithRaw", "density", "raw"), overlayValues = NULL, + correlationMethod = c("none", "pearson", "sparcc", "spearman"), sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c("noVariables", "allVariables", "strataVariables"), @@ -29,6 +30,9 @@ data with smoothed mean. Note only 'raw' is compatible with a continuous overlay \item{overlayValues}{veupathUtils::BinList providing overlay values of interest} +\item{correlationMethod}{character indicating which correlation method to use. One of 'pearson', +'spearman','sparcc' or 'none'. Default is 'none'.} + \item{sampleSizes}{boolean indicating if sample sizes should be computed} \item{completeCases}{boolean indicating if complete cases should be computed} diff --git a/man/scattergl.dt.Rd b/man/scattergl.dt.Rd index c5c196d..985896b 100644 --- a/man/scattergl.dt.Rd +++ b/man/scattergl.dt.Rd @@ -10,6 +10,7 @@ scattergl.dt( value = c("smoothedMean", "smoothedMeanWithRaw", "bestFitLineWithRaw", "density", "raw"), overlayValues = NULL, + correlationMethod = c("none", "pearson", "sparcc", "spearman"), sampleSizes = c(TRUE, FALSE), completeCases = c(TRUE, FALSE), evilMode = c("noVariables", "allVariables", "strataVariables"), @@ -32,6 +33,9 @@ overlay variable.} \item{overlayValues}{veupathUtils::BinList providing overlay values of interest} +\item{correlationMethod}{character indicating which correlation method to use. One of 'pearson', +'spearman', 'sparcc' or 'none'. Default is 'none'.} + \item{sampleSizes}{boolean indicating if sample sizes should be computed} \item{completeCases}{boolean indicating if complete cases should be computed} From 0cb5f6c4042eb35ace4dc5f61f060a3ad0c92303 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 1 May 2024 12:52:04 -0400 Subject: [PATCH 6/6] get tests passing again --- tests/testthat/test-scattergl.R | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/testthat/test-scattergl.R b/tests/testthat/test-scattergl.R index 58a342f..4ba70b4 100644 --- a/tests/testthat/test-scattergl.R +++ b/tests/testthat/test-scattergl.R @@ -43,6 +43,12 @@ test_that("scatter.dt does not fail when there are no complete cases.", { expect_equal(attr$completeCasesAllVars[1], 0) expect_equal(is.list(dt$densityX), TRUE) expect_equal(is.list(dt$densityY), TRUE) + + dt <- scattergl.dt(df, variables, value='raw', correlationMethod = 'pearson') + attr <- attributes(dt) + expect_equal(attr$completeCasesAllVars[1], 0) + expect_equal(is.list(dt$seriesX), TRUE) + expect_equal(is.list(dt$seriesY), TRUE) variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", @@ -129,6 +135,14 @@ test_that("scattergl.dt() returns a valid plot.data scatter object", { namedAttrList <- getPDAttributes(dt) expect_equal(names(namedAttrList),c('variables')) + # make sure correlation coef and pvalue is returned if there is a correlationMethod + dt <- scattergl.dt(df, variables, 'raw', correlationMethod = 'pearson') + expect_is(dt, 'plot.data') + expect_is(dt, 'scatterplot') + namedAttrList <- getPDAttributes(dt) + expect_equal(names(namedAttrList),c('variables', 'completeCasesAllVars','completeCasesAxesVars','completeCasesTable','sampleSizeTable','correlationMethod')) + expect_equal(length(namedAttrList$correlationMethod), 1) + }) test_that("scattergl.dt() returns plot data and config of the appropriate types", { @@ -180,6 +194,13 @@ test_that("scattergl.dt() returns plot data and config of the appropriate types" expect_equal(class(unlist(sampleSizes$entity.cat4)), 'character') expect_equal(class(unlist(sampleSizes$size)), 'integer') + # check types of correlation results when there is a correlationMethod + dt <- scattergl.dt(df, variables, 'raw', correlationMethod = 'pearson') + expect_equal(class(dt$correlationCoef), 'numeric') + expect_equal(class(dt$pValue), 'numeric') + namedAttrList <- getPDAttributes(dt) + expect_equal(class(namedAttrList$correlationMethod),c('scalar', 'character')) + variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", variableClass = new("VariableClass", value = 'native'), @@ -282,6 +303,12 @@ test_that("scattergl.dt() returns an appropriately sized data.table", { expect_equal(nrow(dt),12) expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2')) + # should see some new cols if we have a correlationMethod + dt <- scattergl.dt(df, variables, 'bestFitLineWithRaw', correlationMethod = 'pearson') + expect_is(dt, 'data.table') + expect_equal(nrow(dt),12) + expect_equal(names(dt),c('entity.cat4', 'entity.cat3', 'seriesX', 'seriesY', 'bestFitLineX', 'bestFitLineY', 'r2', 'correlationCoef', 'pValue')) + variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata", variableClass = new("VariableClass", value = 'native'), @@ -900,6 +927,16 @@ test_that("scattergl() returns appropriately formatted json", { expect_equal(names(jsonList$completeCasesTable$variableDetails), c('variableId','entityId')) expect_equal(jsonList$completeCasesTable$variableDetails$variableId, c('contA', 'contB', 'cat3', 'cat4')) + # check json for correlations when correlationMethod is not none + dt <- scattergl.dt(df, variables, 'smoothedMeanWithRaw', correlationMethod = 'pearson') + outJson <- getJSON(dt, FALSE) + jsonList <- jsonlite::fromJSON(outJson) + expect_equal(names(jsonList$scatterplot$data),c('facetVariableDetails','overlayVariableDetails','seriesX','seriesY','smoothedMeanX','smoothedMeanY','smoothedMeanSE','smoothedMeanError','correlationCoef','pValue')) + expect_equal(names(jsonList$scatterplot$config), c('variables','completeCasesAllVars','completeCasesAxesVars','correlationMethod')) + expect_equal(jsonList$scatterplot$config$correlationMethod, 'pearson') + expect_equal(class(jsonList$scatterplot$data$correlationCoef), 'numeric') + expect_equal(class(jsonList$scatterplot$data$pValue), 'numeric') + # Continuous overlay with > 8 values variables <- new("VariableMetadataList", SimpleList( new("VariableMetadata",