From aa27620879126e4239a87de23538b49a92094545 Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 13 Feb 2025 21:26:22 +0000 Subject: [PATCH] minor fixes to plotting - not the big commit it looks like --- fireSense_IgnitionFit.R | 306 ++++++++++++++++++++-------------------- 1 file changed, 153 insertions(+), 153 deletions(-) diff --git a/fireSense_IgnitionFit.R b/fireSense_IgnitionFit.R index 38853fb..b772eee 100644 --- a/fireSense_IgnitionFit.R +++ b/fireSense_IgnitionFit.R @@ -156,7 +156,7 @@ frequencyFitRun <- function(sim) { fireSense_ignitionFormula <- as.formula(sim$fireSense_ignitionFormula, env = .GlobalEnv) terms <- terms.formula(fireSense_ignitionFormula) - # Check the presence of at least one piecewise term + fireSense_ignitionCovariates <- sim$fireSense_ignitionCovariates fireSense_ignitionCovariates <- copy(setDT(fireSense_ignitionCovariates)) @@ -166,15 +166,12 @@ frequencyFitRun <- function(sim) { stop(moduleName, "> Incomplete formula, the LHS is missing.") } - ## any years in data? if (any(c("year", "yr") %in% tolower(names(fireSense_ignitionCovariates)))) { xvar <- intersect(c("year", "yr"), tolower(names(fireSense_ignitionCovariates))) } else { xvar <- rows } - ## rescale variable and knots. - #this should always happen if (isTRUE(P(sim)$rescaleVars)) { # rescalers <- abs(sapply(fireSense_ignitionCovariates[, .SD, .SDcol = toRescale], FUN = max)) @@ -267,185 +264,188 @@ frequencyFitRun <- function(sim) { ## https://stackoverflow.com/a/68684973 -- NOT VERY APPROPRIATE FOR RE model pseudoR2 <- as.numeric(1 - logLik(bestModel) / logLik(nullModel)) - - #### Plotting glmmTMB #### - ## build two prediction datasets - ## both predict across quantiles of climate variable - ## the first dataset will have class means for cover and forest biomass - ## the second will have cover values of 1 (i.e. representing complete cover for non-forest) - ## and biomass values representing the mean for forested pixels (i.e. complete cover of forest) - ## the 2nd dataset is different because the first one, while more representative of the actual landscape, + if (terms(nullModel) != terms(bestModel)) { + #### Plotting glmmTMB #### + ## build two prediction datasets + ## both predict across quantiles of climate variable + ## the first dataset will have class means for cover and forest biomass + ## the second will have cover values of 1 (i.e. representing complete cover for non-forest) + ## and biomass values representing the mean for forested pixels (i.e. complete cover of forest) + ## the 2nd dataset is different because the first one, while more representative of the actual landscape, ## implicitly includes non-forest pixels in the calculation of the mean. - N <- 30 - p <- as.data.table(lapply(m, function(pp) if (is.numeric(pp)) mean(pp) else pp[1:N])) - terms <- terms(bestModel) - termsNonClimate <- setdiff(attr(terms, "term.labels"), climVar) - - ## populate a prediction dataset with quantiles of climate variable and mutually exclusive veg. - for (var in climVar) { - interpolateClimVar <- seq(quantile(m[[var]], 0.1), - (quantile(m[[var]], 0.95) * 1.5), length.out = N) - set(p, NULL, var, interpolateClimVar) - - ## if more than 1 climate variable are used, they are plotted sequentially - ## each prediction dataset will contain quantiles of one variable and mean of the other(s) - otherClimVar <- climVar[!climVar %in% var] - if (length(otherClimVar) > 0) { - for (otherVar in otherClimVar) { - set(p, NULL, otherVar, mean(m[[otherVar]])) + N <- 30 + p <- as.data.table(lapply(m, function(pp) if (is.numeric(pp)) mean(pp) else pp[1:N])) + terms <- terms(bestModel) + termsNonClimate <- setdiff(attr(terms, "term.labels"), climVar) + + ## populate a prediction dataset with quantiles of climate variable and mutually exclusive veg. + for (var in climVar) { + interpolateClimVar <- seq(quantile(m[[var]], 0.1), + (quantile(m[[var]], 0.95) * 1.5), length.out = N) + set(p, NULL, var, interpolateClimVar) + + ## if more than 1 climate variable are used, they are plotted sequentially + ## each prediction dataset will contain quantiles of one variable and mean of the other(s) + otherClimVar <- climVar[!climVar %in% var] + if (length(otherClimVar) > 0) { + for (otherVar in otherClimVar) { + set(p, NULL, otherVar, mean(m[[otherVar]])) + } } - } - - termsNoInteraction <- termsNonClimate[termsNonClimate %in% names(m)] - if (length(termsNoInteraction) == 0) { #all terms are interactions between fuel and climate - termsWithInteraction <- attr(terms, "term.labels") - termsNoInteraction <- sub(termsWithInteraction, pattern = climVar, replacement = "") |> - sub(pattern = ":", replacement = "") #to catch ':' or ':' - } - - #remove columns that aren't model terms - don't use set diff b/c some terms are only interaction - unneededCovs <- setdiff(colnames(p), c(termsNoInteraction, climVar)) - for (rmCol in unneededCovs) { - set(p, NULL, rmCol, NULL) - } - - pAll <- rbindlist(lapply(seq(termsNoInteraction), function(x) p)) - pAll[, val := rep(termsNoInteraction, each = N)] - - termsUsingCover <- as.vector(m[, lapply(.SD, max), .SDcol = termsNoInteraction]) - termsUsingBiomass <- names(termsUsingCover[termsUsingCover > 1]) - termsUsingCover <- setdiff(names(termsUsingCover), termsUsingBiomass) - for (val1 in c(termsUsingCover)) { - set(pAll, which(!pAll$val %in% val1), val1, 0) - } + termsNoInteraction <- termsNonClimate[termsNonClimate %in% names(m)] + if (length(termsNoInteraction) == 0) { #all terms are interactions between fuel and climate + termsWithInteraction <- attr(terms, "term.labels") + termsNoInteraction <- sub(termsWithInteraction, pattern = climVar, replacement = "") |> + sub(pattern = ":", replacement = "") #to catch ':' or ':' + } - #set minimum biomass as whatever is in data (likely log(100)-1) - minBiomass <- min(m[, .SD, .SDcol = termsUsingBiomass]) - for (val1 in c(termsUsingCover)) { - set(pAll, which(!pAll$val %in% val1), val1, minBiomass) - } + #remove columns that aren't model terms - don't use set diff b/c some terms are only interaction + unneededCovs <- setdiff(colnames(p), c(termsNoInteraction, climVar)) + for (rmCol in unneededCovs) { + set(p, NULL, rmCol, NULL) + } + pAll <- rbindlist(lapply(seq(termsNoInteraction), function(x) p)) + pAll[, val := rep(termsNoInteraction, each = N)] - #TODO: caching preds is not currently working with reproducible 2.1.2 or 2.1.2.9007 (recursion error) - system.time({ - preds <- predict(object = bestModel, newdata = pAll, se.fit = TRUE, re.form = NA) - }) - pAll[, pred := expit(preds$fit)] - pAll[, val1 := factor(val)] - pAll[, upper := expit(preds$fit + preds$se.fit)] - pAll[, lower := expit(preds$fit - preds$se.fit)] - - resInKm2 <- prod(res(sim$ignitionFitRTM)) / 1e6 ## 1e6 m^2 == 1 km^2 - labelToUse <- paste("Ignition rate per", resInKm2, "km^2") - filenameToUse <- paste0("IgnitionRatePer", resInKm2, "km2_", - P(sim)$.studyAreaName, "_meanByClass_", climVar) - - titl <- paste0("fireSense_IgnitionFit:", P(sim)$.studyAreaName, - " (", basename(outputPath(sim)), ")", - " -- Pseudo ") - titl2 <- paste0(round(pseudoR2, 3)) - if (isTRUE(P(sim)$rescaleVars)) { - pAll <- rescaleVars(pAll, 1/sim$ignitionRescalers) # invert it - } + termsUsingCover <- as.vector(m[, lapply(.SD, max), .SDcol = termsNoInteraction]) + termsUsingBiomass <- names(termsUsingCover[termsUsingCover > 1]) + termsUsingCover <- setdiff(names(termsUsingCover), termsUsingBiomass) - Plots(data = pAll, fn = plotFnLogitIgnition, # xColName = colName, - ggylab = labelToUse, - subtitle = "using mean cover and biomass per pixel", - fillTitle = "veg. covariate", - .plotInitialTime = NULL, # this means "ignore what `.plotInitialTime says; use only .plots` - # centred = centred, - climateVar = var, #TODO: fix to allow multiple climate variables - # origXmax = max(sim$fireSense_ignitionCovariates[[colName]]), ## if supplied, adds bar to plot - ggTitle = bquote(.(titl)~R^2 == .(titl2)), - rawClimate = m[[var]], - filename = filenameToUse) - - ## make second prediction using mean forest or alternatively 100% non-forest cover - - if (!is.null(attributes(sim$ignitionFitRTM)$meanForestB) || - !is.null(P(sim)$plot_fuelBiomassPerPrediction)) { - - Bunit <- ifelse(!is.null(P(sim)$plot_fuelBiomassPerPrediction), - P(sim)$plot_fuelBiomassPerPrediction, #should be log already - log(attributes(sim$ignitionFitRTM)$meanForestB)) - BunitForLabel <- round(exp(Bunit), digits = 0) - pAll2 <- copy(pAll) - - for (val1 in termsUsingBiomass) { - set(pAll2, which(pAll2$val %in% val1), val1, Bunit) + for (val1 in c(termsUsingCover)) { + set(pAll, which(!pAll$val %in% val1), val1, 0) } - for (val1 in termsUsingCover) { - set(pAll2, which(pAll2$val %in% val1), val1, 1) #set the variable to 1 representing complete cover + #set minimum biomass as whatever is in data (likely log(100)-1) + minBiomass <- min(m[, .SD, .SDcol = termsUsingBiomass]) + for (val1 in c(termsUsingCover)) { + set(pAll, which(!pAll$val %in% val1), val1, minBiomass) } + #TODO: caching preds is not currently working with reproducible 2.1.2 or 2.1.2.9007 (recursion error) system.time({ - preds <- predict(object= bestModel, newdata = pAll2, se.fit = TRUE, re.form = NA)# |> - # Cache(omitArgs = "object", .cacheExtra = forms[whBest]) + preds <- predict(object = bestModel, newdata = pAll, se.fit = TRUE, re.form = NA) }) + pAll[, pred := expit(preds$fit)] + pAll[, val1 := factor(val)] + pAll[, upper := expit(preds$fit + preds$se.fit)] + pAll[, lower := expit(preds$fit - preds$se.fit)] + + resInKm2 <- prod(res(sim$ignitionFitRTM)) / 1e6 ## 1e6 m^2 == 1 km^2 + labelToUse <- paste("Ignition rate per", resInKm2, "km^2") + filenameToUse <- paste0("IgnitionRatePer", resInKm2, "km2_", + P(sim)$.studyAreaName, "_meanByClass_", climVar) + + titl <- paste0("fireSense_IgnitionFit:", P(sim)$.studyAreaName, + " (", basename(outputPath(sim)), ")", + " -- Pseudo ") + titl2 <- paste0(round(pseudoR2, 3)) + if (isTRUE(P(sim)$rescaleVars)) { + pAll <- rescaleVars(pAll, 1/sim$ignitionRescalers) # invert it + } - pAll2[, pred := expit(preds$fit)] - pAll2[, val1 := factor(val)] - pAll2[, upper := expit(preds$fit + preds$se.fit)] - pAll2[, lower := expit(preds$fit - preds$se.fit)] - - filenameToUse <- paste0("IgnitionRatePer", resInKm2, "km2_", P(sim)$.studyAreaName, "_fullCoverAndBiomass_", climVar) - Plots(data = pAll2, fn = plotFnLogitIgnition, + Plots(data = pAll, fn = plotFnLogitIgnition, # xColName = colName, ggylab = labelToUse, - subtitle = paste0("per ", BunitForLabel, " g B/m2 or 100% cover"), + subtitle = "using mean cover and biomass per pixel", fillTitle = "veg. covariate", .plotInitialTime = NULL, # this means "ignore what `.plotInitialTime says; use only .plots` + # centred = centred, climateVar = var, - rawClimate = m[[var]], # origXmax = max(sim$fireSense_ignitionCovariates[[colName]]), ## if supplied, adds bar to plot ggTitle = bquote(.(titl)~R^2 == .(titl2)), + rawClimate = m[[var]], filename = filenameToUse) - } - } - #TODO: caching preds is not currently working with reproducible 2.1.2 or 2.1.2.9007 (recursion error) - system.time({ - fittedNoRE <- predict(object = bestModel, newdata = m, se.fit = FALSE, re.form = NA, - type = "response") #|> - # Cache(.functionName = "predict_forFitted_v_Obs_Ignitions", - # omitArgs = "object", .cacheExtra = forms[whBest]) - }) - - # fittedVals <- fitted(bestModel) - plotData <- data.table(fireSense_ignitionCovariates) - plotData[, rows := 1:nrow(plotData)] - cols <- unique(c(paste(y), xvar, "rows")) - plotData <- plotData[, ..cols] - plotData <- cbind(plotData, fittedNoRE = fittedNoRE) + ## make second prediction using mean forest or alternatively 100% non-forest cover - predDT <- rbindlist(lapply(1:100, FUN = function(x, DT) { - rpoisPred <- rpois(nrow(DT), lambda = DT$fittedNoRE) - n <- rep(x, nrow(DT)) - data.table(rpoisPred = rpoisPred, n = n, rows = DT$rows) - }, DT = plotData)) + if (!is.null(attributes(sim$ignitionFitRTM)$meanForestB) || + !is.null(P(sim)$plot_fuelBiomassPerPrediction)) { - plotData <- plotData[predDT, on = "rows"] + Bunit <- ifelse(!is.null(P(sim)$plot_fuelBiomassPerPrediction), + P(sim)$plot_fuelBiomassPerPrediction, + log(attributes(sim$ignitionFitRTM)$meanForestB)) + BunitForLabel <- round(exp(Bunit), digits = 0) + pAll2 <- copy(pAll) - plotData <- plotData[, list(obsFires = sum(eval(y), na.rm = TRUE), - predFires = sum(rpoisPred, na.rm = TRUE)), - by = c(xvar, "n")] - plotData[, obsFires := as.integer(obsFires)] - plotData[, predFires := as.integer(predFires)] + for (val1 in termsUsingBiomass) { + set(pAll2, which(pAll2$val %in% val1), val1, Bunit) + } - pd <- plotData[, .(obsFires = mean(obsFires), predFires = mean(predFires)), .(year)] - correl <- cor(pd$obsFires, pd$predFires) + for (val1 in termsUsingCover) { + set(pAll2, which(pAll2$val %in% val1), val1, 1)#set the variable to 1 representing complete cover + } - plotData <- melt(plotData, id.var = c(xvar, "n")) + #TODO: caching preds is not currently working with reproducible 2.1.2 or 2.1.2.9007 (recursion error) + system.time({ + preds <- predict(object= bestModel, newdata = pAll2, se.fit = TRUE, re.form = NA)# |> + # Cache(omitArgs = "object", .cacheExtra = forms[whBest]) + }) + + pAll2[, pred := expit(preds$fit)] + pAll2[, val1 := factor(val)] + pAll2[, upper := expit(preds$fit + preds$se.fit)] + pAll2[, lower := expit(preds$fit - preds$se.fit)] + + filenameToUse <- paste0("IgnitionRatePer", resInKm2, "km2_", P(sim)$.studyAreaName, "_fullCoverAndBiomass_", climVar) + Plots(data = pAll2, fn = plotFnLogitIgnition, + ggylab = labelToUse, + subtitle = paste0("per ", BunitForLabel, " g B/m2 or 100% cover"), + fillTitle = "veg. covariate", + .plotInitialTime = NULL, # this means "ignore what `.plotInitialTime says; use only .plots` + climateVar = var, + rawClimate = m[[var]], + # origXmax = max(sim$fireSense_ignitionCovariates[[colName]]), ## if supplied, adds bar to plot + ggTitle = bquote(.(titl)~R^2 == .(titl2)), + filename = filenameToUse) + } + } + #TODO: caching preds is not currently working with reproducible 2.1.2 or 2.1.2.9007 (recursion error) + system.time({ + fittedNoRE <- predict(object = bestModel, newdata = m, se.fit = FALSE, re.form = NA, + type = "response") #|> + # Cache(.functionName = "predict_forFitted_v_Obs_Ignitions", + # omitArgs = "object", .cacheExtra = forms[whBest]) + }) - Plots(data = plotData, fn = fittedVsObservedPlot, - xColName = xvar, .plotInitialTime = NULL, - ggylab = "num. fires", - ggTitle = paste(P(sim)$.studyAreaName, "fireSense_IgnitionFit: obs. vs. fit"), - ggSubtitle = paste0("Correlation = ", round(correl, 2)), - filename = paste0("ignition_NumFiresFitted_", P(sim)$.studyAreaName)) + # fittedVals <- fitted(bestModel) + + plotData <- data.table(fireSense_ignitionCovariates) + plotData[, rows := 1:nrow(plotData)] + cols <- unique(c(paste(y), xvar, "rows")) + plotData <- plotData[, ..cols] + plotData <- cbind(plotData, fittedNoRE = fittedNoRE) + + predDT <- rbindlist(lapply(1:100, FUN = function(x, DT) { + rpoisPred <- rpois(nrow(DT), lambda = DT$fittedNoRE) + n <- rep(x, nrow(DT)) + data.table(rpoisPred = rpoisPred, n = n, rows = DT$rows) + }, DT = plotData)) + + plotData <- plotData[predDT, on = "rows"] + + plotData <- plotData[, list(obsFires = sum(eval(y), na.rm = TRUE), + predFires = sum(rpoisPred, na.rm = TRUE)), + by = c(xvar, "n")] + plotData[, obsFires := as.integer(obsFires)] + plotData[, predFires := as.integer(predFires)] + + pd <- plotData[, .(obsFires = mean(obsFires), predFires = mean(predFires)), .(year)] + correl <- cor(pd$obsFires, pd$predFires) + + plotData <- melt(plotData, id.var = c(xvar, "n")) + + Plots(data = plotData, fn = fittedVsObservedPlot, + xColName = xvar, .plotInitialTime = NULL, + ggylab = "num. fires", + ggTitle = paste(P(sim)$.studyAreaName, "fireSense_IgnitionFit: obs. vs. fit"), + ggSubtitle = paste0("Correlation = ", round(correl, 2)), + filename = paste0("ignition_NumFiresFitted_", P(sim)$.studyAreaName)) + } else { + message("null ignition model was superior therefore no plotting will occur - please review formula") + } } origNoPix <- attributes(sim$ignitionFitRTM)$nonNAs ## nrow(preSampleData) in eg above